preda · tdulcet · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025 · Dec 30, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -13,28 +13,31 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-22.04, ubuntu-24.04]
+        os: [ubuntu-22.04, ubuntu-24.04, ubuntu-22.04-arm, ubuntu-24.04-arm]
         cxx: [g++, clang++]
+        exclude:
+        - os: ubuntu-22.04-arm
+          cxx: clang++
       fail-fast: false
     env:
       CXX: ${{ matrix.cxx }}
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
     - name: Install
       run: |
         sudo apt-get update -y
         sudo apt-get install -y cppcheck ocl-icd-opencl-dev pocl-opencl-icd
         $CXX --version
     - name: Script
       run: |
-        make prpll -O -j "$(nproc)"
+        make -O -j "$(nproc)"
         cd build-release
         rm -f -- *.o
         ./prpll -h
-    - uses: actions/upload-artifact@v4
+    - uses: actions/upload-artifact@v7
       if: always()
       with:
-        name: ${{ matrix.os }}_${{ matrix.cxx }}_prpll
+        name: ${{ matrix.os }}_${{ endsWith(matrix.os, '-arm') && 'arm' || 'x86' }}_${{ matrix.cxx }}_prpll
         path: ${{ github.workspace }}
     - name: Cppcheck
       run: cppcheck --enable=all --force .
@@ -49,15 +52,17 @@ jobs:
   Windows:
     name: Windows
 
-    runs-on: windows-latest
+    runs-on: ${{ matrix.os }}
     strategy:
       matrix:
+        os: [windows-latest] # windows-11-arm
         cxx: [g++, clang++]
       fail-fast: false
     env:
       CXX: ${{ matrix.cxx }}
+      PACKAGE_PREFIX: mingw-w64-${{ endsWith(matrix.os, '-arm') && 'clang-aarch64' || 'x86_64' }}-
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
     - name: Before Install
       run: |
         echo "C:\msys64\mingw64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
@@ -66,42 +71,48 @@ jobs:
         echo "LIBPATH=-LC:\msys64\mingw64\lib" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
     - name: Install
       run: |
-        pacman -S --noconfirm mingw-w64-x86_64-gmp mingw-w64-x86_64-opencl-icd
+        pacman -S --noconfirm "${env:PACKAGE_PREFIX}opencl-icd"
         & $env:CXX --version
     - name: Install Clang
       if: ${{ matrix.cxx == 'clang++' }}
       run: |
         pacman -S --noconfirm mingw-w64-x86_64-clang
         & $env:CXX --version
     - name: Script
-      run: | # Cannot use `make exe`, as the OpenCL ICD Loader does not support static linking
-        make prpll -O -j $env:NUMBER_OF_PROCESSORS
+      run: |
+        make -O -j $env:NUMBER_OF_PROCESSORS
         cd build-release
         rm *.o
         .\prpll.exe -h
-    - uses: actions/upload-artifact@v4
+    - uses: actions/upload-artifact@v7
       if: always()
       with:
-        name: win_${{ matrix.cxx }}_prpll
+        name: win_${{ endsWith(matrix.os, '-arm') && 'arm' || 'x86' }}_${{ matrix.cxx }}_prpll
         path: ${{ github.workspace }}
 
   macOS:
     name: macOS
 
-    runs-on: macos-13
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [macos-15-intel, macos-latest]
+      fail-fast: false
+    env:
+      CXX: g++-15
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
     - name: Install
       run: |
-        brew install gcc@14
+        $CXX --version
     - name: Script
       run: |
-        make prpll -j "$(sysctl -n hw.ncpu)"
+        make -j "$(sysctl -n hw.ncpu)"
         cd build-release
         rm -f -- *.o
         ./prpll -h
-    - uses: actions/upload-artifact@v4
+    - uses: actions/upload-artifact@v7
       if: always()
       with:
-        name: macos_prpll
+        name: macos_${{ endsWith(matrix.os, '-intel') && 'x86' || 'arm' }}_prpll
         path: ${{ github.workspace }}
diff --git a/Makefile b/Makefile
@@ -14,9 +14,9 @@ HOST_OS = $(shell uname -s)
 
 ifeq ($(HOST_OS), Darwin)
 # Real GCC (not clang), needed for 128-bit floats and std::filesystem::path
-CXX = g++-14
+CXX ?= g++-15
 else
-CXX = g++
+CXX ?= g++
 endif
 
 ifneq ($(findstring MINGW, $(HOST_OS)), MINGW)
@@ -45,7 +45,7 @@ else
 
 BIN=build-release
 
-CXXFLAGS = -O2 -DNDEBUG $(COMMON_FLAGS)
+CXXFLAGS = -O3 -DNDEBUG $(COMMON_FLAGS)
 STRIP=-s
 
 endif
@@ -90,7 +90,7 @@ $(BIN)/%.o : src/%.cpp $(DEPDIR)/%.d
 # src/bundle.cpp is just a wrapping of the OpenCL sources (*.cl) as a C string.
 
 src/bundle.cpp: genbundle.sh src/cl/*.cl
-	./genbundle.sh $^ > src/bundle.cpp
+	bash genbundle.sh $^ > src/bundle.cpp
 
 $(DEPDIR)/%.d: ;
 .PRECIOUS: $(DEPDIR)/%.d

diff --git a/genbundle.sh b/genbundle.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 cat <<EOM
 // Copyright (C) Mihai Preda
 // Generated file, do not edit. See genbundle.sh and src/cl/*.cl
@@ -9,24 +10,24 @@ EOM
 
 names=
 
-for xx in $*
+for xx in "$@"
 do
-    x=`basename $xx`
+    x=$(basename "$xx")
 
     if [ "$x" = "genbundle.sh" ] ; then continue ; fi
 
     names=${names}\"${x}\",
 
-    echo // $xx
+    echo "// $xx"
     #echo const char ${x}_cl[] = R\"cltag\(
-    echo R\"cltag\(
-    cat $xx
-    echo \)cltag\"\,
+    echo 'R"cltag('
+    cat "$xx"
+    echo ')cltag",'
     echo
 done
-echo \}\;
+echo '};'
 
-echo static const std::vector\<const char*\> CL_FILE_NAMES\{${names}\}\;
+echo "static const std::vector<const char*> CL_FILE_NAMES{${names}};"
 
 cat <<EOM
 const std::vector<const char*>& getClFileNames() { return CL_FILE_NAMES; }

diff --git a/src/Args.cpp b/src/Args.cpp
@@ -86,7 +86,7 @@ void Args::readConfig(const fs::path& path) {
   }
 }
 
-u32 Args::getProofPow(u32 exponent) const {
+u32 Args::getProofPow(u64 exponent) const {
   if (proofPow == -1) { return ProofSet::bestPower(exponent); }
   assert(proofPow >= 1);
   return proofPow;
@@ -118,7 +118,7 @@ and should be able to run.
 PRPLL keeps the active tasks in per-worker files worktodo-0.txt, worktodo-1.txt etc in the local directory.
 These per-worker files are supplied from the global worktodo.txt file if -pool is used.
 In turn the global worktodo.txt can be supplied through the primenet.py script,
-either the one located at gpuowl/tools/primenet.py or https://download.mersenne.ca/primenet.py
+either the one located at gpuowl/tools/primenet.py or https://download.mersenne.ca/AutoPrimeNet
 
 It is also possible to manually add exponents by adding lines of the form "PRP=118063003" to worktodo-<N>.txt
 
@@ -310,9 +310,9 @@ void Args::parse(const string& line) {
     } else if (key == "-tune") {
       doTune = true;
       if (!s.empty()) { tune = s; }
-    } else if (key == "-ctune") {
-      doCtune = true;
-      if (!s.empty()) { ctune.push_back(s); }
+//    } else if (key == "-ctune") {
+//      doCtune = true;
+//      if (!s.empty()) { ctune.push_back(s); }
     } else if (key == "-ztune") {
       doZtune = true;
     } else if (key == "-carryTune") {

diff --git a/src/Args.h b/src/Args.h
@@ -30,7 +30,7 @@ class Args {
   bool uses(const std::string& key) const { return flags.find(key) != flags.end(); }
   int value(const std::string& key, int valNotFound = -1) const;
   void readConfig(const fs::path& path);
-  u32 getProofPow(u32 exponent) const;
+  u32 getProofPow(u64 exponent) const;
   string tailDir() const;
 
   bool hasFlag(const string& key) const;
@@ -78,8 +78,8 @@ class Args {
   u32 logStep = 20000;
   string fftSpec;
 
-  u32 prpExp = 0;
-  u32 llExp = 0;
+  u64 prpExp = 0;
+  u64 llExp = 0;
 
   size_t maxAlloc = 0;
 

diff --git a/src/FFTConfig.cpp b/src/FFTConfig.cpp
@@ -182,7 +182,7 @@ if (18.35 + 0.5 * (log2(13 * 1024 * 512) - log2(size())) > 19.0) return 19.0;
   return 18.35 + 0.5 * (log2(13 * 1024 * 512) - log2(size()));
 }
 
-bool FFTShape::needsLargeCarry(u32 E) const {
+bool FFTShape::needsLargeCarry(u64 E) const {
   return E / double(size()) > carry32BPW();
 }
 
@@ -271,12 +271,12 @@ float FFTConfig::maxBpw() const {
   return (carry == CARRY_32 && (shape.fft_type == FFT64 || shape.fft_type == FFT3231)) ? std::min(shape.carry32BPW(), b) : b;
 }
 
-FFTConfig FFTConfig::bestFit(const Args& args, u32 E, const string& spec) {
+FFTConfig FFTConfig::bestFit(const Args& args, u64 E, const string& spec) {
   // A FFT-spec was given, simply take the first FFT from the spec that can handle E
   if (!spec.empty()) {
     FFTConfig fft{spec};
     if (fft.maxExp() * args.fftOverdrive < E) {
-      log("Warning: %s (max %" PRIu64 ") may be too small for %u\n", fft.spec().c_str(), fft.maxExp(), E);
+      log("Warning: %s (max %" PRIu64 ") may be too small for %" PRIu64 "\n", fft.spec().c_str(), fft.maxExp(), E);
     }
     return fft;
   }
@@ -288,7 +288,7 @@ FFTConfig FFTConfig::bestFit(const Args& args, u32 E, const string& spec) {
     if (E <= e.fft.maxExp() * args.fftOverdrive) { return e.fft; }
   }
 
-  log("No FFTs found in tune.txt that can handle %u. Consider tuning with -tune\n", E);
+  log("No FFTs found in tune.txt that can handle %" PRIu64 ". Consider tuning with -tune\n", E);
 
   // Take the first FFT that can handle E
   for (const FFTShape& shape : FFTShape::allShapes()) {
@@ -297,7 +297,7 @@ FFTConfig FFTConfig::bestFit(const Args& args, u32 E, const string& spec) {
     }
   }
 
-  log("No FFT found for %u\n", E);
+  log("No FFT found for %" PRIu64 "\n", E);
   throw "No FFT";
 }
 

diff --git a/src/FFTConfig.h b/src/FFTConfig.h
@@ -27,7 +27,7 @@ class FFTShape {
 public:
   static std::vector<FFTShape> allShapes(u32 from=0, u32 to = -1);
 
-  static tuple<u32, u32, bool> getChainLengths(u32 fftSize, u32 exponent, u32 middle);
+  static tuple<u32, u32, bool> getChainLengths(u32 fftSize, u64 exponent, u32 middle);
 
   static vector<FFTShape> multiSpec(const string& spec);
 
@@ -51,7 +51,7 @@ class FFTShape {
   std::string spec() const { return (fft_type ? to_string(fft_type) + ':' : "") + numberK(width) + ':' + numberK(middle) + ':' + numberK(height); }
 
   float carry32BPW() const;
-  bool needsLargeCarry(u32 E) const;
+  bool needsLargeCarry(u64 E) const;
   bool isFavoredShape() const;
 };
 
@@ -73,7 +73,7 @@ enum CARRY_KIND {CARRY_32=0, CARRY_64=1, CARRY_AUTO=2};
 
 struct FFTConfig {
 public:
-  static FFTConfig bestFit(const Args& args, u32 E, const std::string& spec);
+  static FFTConfig bestFit(const Args& args, u64 E, const std::string& spec);
 
   // Which FP and NTT primes are involved in the FFT
   bool FFT_FP64;