diff --git a/modules/nf-core/huggingface/download/environment.yml b/modules/nf-core/huggingface/download/environment.yml
new file mode 100644
index 00000000000..f2267b412f6
--- /dev/null
+++ b/modules/nf-core/huggingface/download/environment.yml
@@ -0,0 +1,6 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+dependencies:
+  - conda-forge::huggingface_hub=1.6.0
diff --git a/modules/nf-core/huggingface/download/main.nf b/modules/nf-core/huggingface/download/main.nf
new file mode 100644
index 00000000000..78c6211ac34
--- /dev/null
+++ b/modules/nf-core/huggingface/download/main.nf
@@ -0,0 +1,30 @@
+process HF_DOWNLOAD {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "community.wave.seqera.io/library/huggingface_hub:1.6.0--c106a7f9664ca39b"
+
+    input:
+    tuple val(meta), val(hf_repo), val(hf_file), val(hf_home)
+
+    output:
+    tuple val(meta), path(hf_file), emit: output
+    tuple val("${task.process}"), val("huggingface_hub"), eval("hf --version 2>&1 | tail -n1 | awk '{print \$NF}'"), topic: versions, emit: versions_huggingface_hub
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def hf_home_resolved = hf_home ?: "${workflow.projectDir}/hf_cache"
+    """
+    export HF_HOME="${hf_home_resolved}"
+    export HF_HUB_CACHE=\$HF_HOME
+    hf download ${hf_repo} ${hf_file} --local-dir \$PWD
+    """
+
+    stub:
+    """
+    touch ${hf_file}
+    """
+}
diff --git a/modules/nf-core/huggingface/download/meta.yml b/modules/nf-core/huggingface/download/meta.yml
new file mode 100644
index 00000000000..a3186bce7b0
--- /dev/null
+++ b/modules/nf-core/huggingface/download/meta.yml
@@ -0,0 +1,66 @@
+name: hf_download
+description: Tool for downloading models from HuggingFace
+keywords:
+  - llm
+  - llama
+  - ai
+tools:
+  - huggingface_hub:
+      description: "HuggingFace Hub CLI interface"
+      homepage: "https://huggingface.co/docs/huggingface_hub/guides/cli"
+      licence:
+        - "MIT"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`- prompt_file:
+    - hf_repo:
+        type: string
+        description: HuggingFace repository
+    - hf_file:
+        type: string
+        description: HuggingFace GGUF file
+    - hf_home:
+        type: string
+        description: HuggingFace default cache directory
+output:
+  output:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - hf_file:
+          type: file
+          description: Downloaded HuggingFace GGUF file
+          ontologies: []
+  versions_huggingface_hub:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - huggingface_hub:
+          type: string
+          description: The name of the tool
+      - hf --version 2>&1 | tail -n1 | awk '{print \$NF}':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - huggingface_hub:
+          type: string
+          description: The name of the tool
+      - hf --version 2>&1 | tail -n1 | awk '{print \$NF}':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@toniher"
+  - "@lucacozzuto"
+maintainers:
+  - "@toniher"
+  - "@lucacozzuto"
diff --git a/modules/nf-core/huggingface/download/tests/main.nf.test b/modules/nf-core/huggingface/download/tests/main.nf.test
new file mode 100644
index 00000000000..2173c5d3289
--- /dev/null
+++ b/modules/nf-core/huggingface/download/tests/main.nf.test
@@ -0,0 +1,66 @@
+nextflow_process {
+
+    name "Test Process HF_DOWNLOAD"
+    script "../main.nf"
+    process "HF_DOWNLOAD"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "huggingface"
+    tag "huggingface/download"
+
+    test("download gguf file") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_model' ],
+                    "ggml-org/gemma-3-1b-it-GGUF",
+                    "gemma-3-1b-it-Q4_K_M.gguf",
+                    "./hf_cache"
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.output.size() == 1 },
+                { assert process.out.output[0][0] == [ id:'test_model' ] },
+                { assert file(process.out.output[0][1]).name == "gemma-3-1b-it-Q4_K_M.gguf" },
+                { assert file(process.out.output[0][1]).size() > 0 },
+                { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+            )
+        }
+    }
+
+    test("stub - download gguf file") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_model' ],
+                    "ggml-org/gemma-3-1b-it-GGUF",
+                    "gemma-3-1b-it-Q4_K_M.gguf",
+                    "./hf_cache"
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.output.size() == 1 },
+                { assert process.out.output[0][0] == [ id:'test_model' ] },
+                { assert file(process.out.output[0][1]).name == "gemma-3-1b-it-Q4_K_M.gguf" },
+                { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/huggingface/download/tests/main.nf.test.snap b/modules/nf-core/huggingface/download/tests/main.nf.test.snap
new file mode 100644
index 00000000000..5565340150b
--- /dev/null
+++ b/modules/nf-core/huggingface/download/tests/main.nf.test.snap
@@ -0,0 +1,38 @@
+{
+    "stub - download gguf file": {
+        "content": [
+            {
+                "versions_huggingface_hub": [
+                    [
+                        "HF_DOWNLOAD",
+                        "huggingface_hub",
+                        "1.6.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-26T08:39:57.919278809",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    },
+    "download gguf file": {
+        "content": [
+            {
+                "versions_huggingface_hub": [
+                    [
+                        "HF_DOWNLOAD",
+                        "huggingface_hub",
+                        "1.6.0"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-26T08:38:24.630341776",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    }
+}
diff --git a/modules/nf-core/huggingface/download/tests/nextflow.config b/modules/nf-core/huggingface/download/tests/nextflow.config
new file mode 100644
index 00000000000..68a5630f57c
--- /dev/null
+++ b/modules/nf-core/huggingface/download/tests/nextflow.config
@@ -0,0 +1,7 @@
+nextflow.enable.moduleBinaries = true
+
+process {
+    withName: 'HF_DOWNLOAD' {
+        containerOptions = { workflow.profile.contains('docker') ? "--volume ${projectDir}/hf_cache:${projectDir}/hf_cache" : '' }
+    }
+}
diff --git a/modules/nf-core/llamacpp-python/run/Dockerfile b/modules/nf-core/llamacpp-python/run/Dockerfile
new file mode 100644
index 00000000000..aeb89834ee1
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/Dockerfile
@@ -0,0 +1,5 @@
+FROM nvidia/cuda:12.4.1-devel-ubuntu22.04
+
+RUN apt-get update && apt-get install -y python3 python3-pip
+RUN pip3 install llama-cpp-python  \
+    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
diff --git a/modules/nf-core/llamacpp-python/run/environment.yml b/modules/nf-core/llamacpp-python/run/environment.yml
new file mode 100644
index 00000000000..9f314201924
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::llama-cpp-python=0.3.16
diff --git a/modules/nf-core/llamacpp-python/run/main.nf b/modules/nf-core/llamacpp-python/run/main.nf
new file mode 100644
index 00000000000..5dec0f6bcf2
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/main.nf
@@ -0,0 +1,36 @@
+process LLAMACPP_PYTHON_RUN {
+    tag "$meta.id"
+    label 'process_medium'
+    label 'process_gpu'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ task.ext.use_gpu ? 'quay.io/nf-core/llama-cpp-python:0.1.9' : 'community.wave.seqera.io/library/llama-cpp-python:0.3.16--b351398cd0ea7fc5' }"
+
+    input:
+    tuple val(meta), path(prompt_file), path(gguf_model)
+
+    output:
+    tuple val(meta), path("output.txt"), emit: output
+    tuple val("${task.process}"), val("llama-cpp-python"), eval("python3 -c 'import llama_cpp; print(llama_cpp.__version__)'"), topic: versions, emit: versions_llama_cpp_python
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    llamacpp-python.py \
+        --model ${gguf_model} \
+        --messages ${prompt_file} \
+        --output output.txt \
+        ${args}
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch output.txt
+
+    """
+}
diff --git a/modules/nf-core/llamacpp-python/run/meta.yml b/modules/nf-core/llamacpp-python/run/meta.yml
new file mode 100644
index 00000000000..70d7bbf4f16
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/meta.yml
@@ -0,0 +1,69 @@
+name: llamacpp_python_run
+description: Python wrapper for running locally-hosted LLM with llama.cpp
+keywords:
+  - llm
+  - llama
+  - ai
+tools:
+  - llama-cpp-python:
+      description: "Python wrapper for llama.cpp LLM inference tool"
+      homepage: "https://llama-cpp-python.readthedocs.io/en/latest/"
+      licence:
+        - "MIT"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`- prompt_file:
+    - prompt_file:
+        type: file
+        description: |
+          Prompt file
+          Structure: [ val(meta), path(prompt_file) ]
+        ontologies: []
+    - gguf_model:
+        type: file
+        description: |
+          GGUF model
+          Structure: [ val(meta), path(gguf_model) ]
+        ontologies: []
+output:
+  output:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "output.txt":
+          type: file
+          description: File with the output of LLM inference request
+          ontologies: []
+  versions_llama_cpp_python:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - llama-cpp-python:
+          type: string
+          description: The name of the tool
+      - python3 -c 'import llama_cpp; print(llama_cpp.__version__)':
+          type: eval
+          description: The expression to obtain the version of the tool
+topics:
+  versions:
+    - - ${task.process}:
+          type: string
+          description: The name of the process
+      - llama-cpp-python:
+          type: string
+          description: The name of the tool
+      - python3 -c 'import llama_cpp; print(llama_cpp.__version__)':
+          type: eval
+          description: The expression to obtain the version of the tool
+authors:
+  - "@toniher"
+  - "@lucacozzuto"
+maintainers:
+  - "@toniher"
+  - "@lucacozzuto"
diff --git a/modules/nf-core/llamacpp-python/run/resources/usr/bin/llamacpp-python.py b/modules/nf-core/llamacpp-python/run/resources/usr/bin/llamacpp-python.py
new file mode 100755
index 00000000000..4e332bca633
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/resources/usr/bin/llamacpp-python.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+
+import argparse
+import json
+import os
+import sys
+
+import llama_cpp
+
+
+# Helper to create messages from a text file
+def create_messages_from_textfile(textfile, system_prompt):
+    try:
+        with open(textfile, encoding="utf-8") as f:
+            content = f.read()
+        return [
+            {"role": "system", "content": system_prompt.strip()},
+            {"role": "user", "content": content.strip()},
+        ]
+    except Exception as e:
+        print(f"Error reading text file '{textfile}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+# Helper to load messages from JSON or fallback to text
+def load_messages(messages_file, system_prompt):
+    if not os.path.exists(messages_file):
+        print(f"Messages file '{messages_file}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+    try:
+        with open(messages_file, encoding="utf-8") as f:
+            content = f.read()
+            try:
+                return json.loads(content)
+            except json.JSONDecodeError:
+                return create_messages_from_textfile(messages_file, system_prompt)
+    except Exception as e:
+        print(f"Error opening messages file '{messages_file}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def llamacpp_python(
+    messages_file,
+    model_file,
+    temperature=0.9,
+    output="output.txt",
+    verbose=False,
+    context_size=2048,
+    chat_format="chatml",
+    seed=None,
+):
+    if not os.path.exists(model_file):
+        print(f"Model file '{model_file}' does not exist.", file=sys.stderr)
+        sys.exit(1)
+
+    # Default system prompt
+    system_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions"
+
+    messages_json = load_messages(messages_file, system_prompt)
+
+    try:
+        llm = llama_cpp.Llama(
+            model_path=model_file,
+            chat_format=chat_format,
+            n_ctx=context_size,
+            seed=seed,
+        )
+        response = llm.create_chat_completion(
+            messages=messages_json,
+            response_format={"type": "json_object"},
+            temperature=temperature,
+        )
+    except Exception as e:
+        print(f"Error running llama_cpp: {e}", file=sys.stderr)
+        sys.exit(1)
+
+    if not verbose:
+        try:
+            reply = response["choices"][0]["message"]["content"]
+        except (KeyError, IndexError, TypeError):
+            reply = response
+        # Try to parse reply as JSON if it's a string
+        if isinstance(reply, str):
+            try:
+                reply_json = json.loads(reply)
+                if isinstance(reply_json, dict) and len(reply_json) == 1:
+                    reply = next(iter(reply_json.values()))
+                else:
+                    reply = reply_json
+            except Exception:
+                pass  # Leave reply as string if not valid JSON
+        elif isinstance(reply, dict) and len(reply) == 1:
+            reply = next(iter(reply.values()))
+    else:
+        reply = response
+
+    try:
+        with open(output, "w", encoding="utf-8") as f:
+            if isinstance(reply, str):
+                f.write(reply)
+            else:
+                f.write(json.dumps(reply, indent=2))
+        if verbose:
+            print(f"Output written to {output}")
+    except Exception as e:
+        print(f"Error writing output file '{output}': {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Submit a process with model.")
+    parser.add_argument("-s", "--messages", required=True, help="JSON message")
+    parser.add_argument("-m", "--model", required=True, help="Model used")
+    parser.add_argument("-t", "--temperature", default=0.9, type=float, help="Temperature")
+    parser.add_argument("-o", "--output", default="output.txt", help="Output text")
+    parser.add_argument("-c", "--context", default=2048, type=int, help="Context size")
+    parser.add_argument("--chat_format", default="chatml", help="Chat format")
+    parser.add_argument("--seed", default=None, type=int, help="Defined seed")
+    parser.add_argument("--verbose", action="store_true", help="Verbose output")
+
+    args = parser.parse_args()
+    llamacpp_python(
+        messages_file=args.messages,
+        model_file=args.model,
+        temperature=args.temperature,
+        output=args.output,
+        verbose=args.verbose,
+        context_size=args.context,
+        chat_format=args.chat_format,
+        seed=args.seed,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/llamacpp-python/run/tests/data/prompt.json b/modules/nf-core/llamacpp-python/run/tests/data/prompt.json
new file mode 100644
index 00000000000..52d9ea3e2aa
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/tests/data/prompt.json
@@ -0,0 +1,10 @@
+[
+    {
+        "role": "system",
+        "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions"
+    },
+    {
+        "role": "user",
+        "content": "Describe Barcelona in one paragraph"
+    }
+]
diff --git a/modules/nf-core/llamacpp-python/run/tests/data/stub_model.gguf b/modules/nf-core/llamacpp-python/run/tests/data/stub_model.gguf
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/modules/nf-core/llamacpp-python/run/tests/main.nf.test b/modules/nf-core/llamacpp-python/run/tests/main.nf.test
new file mode 100644
index 00000000000..dc789505242
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/tests/main.nf.test
@@ -0,0 +1,81 @@
+nextflow_process {
+
+    name "Test Process LLAMACPP_PYTHON_RUN"
+    script "../main.nf"
+    process "LLAMACPP_PYTHON_RUN"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "llamacpp-python"
+    tag "llamacpp-python/run"
+    tag "huggingface/download"
+
+    test("run inference with downloaded gguf model") {
+
+        config "./nextflow.config"
+
+        setup {
+            run("HF_DOWNLOAD") {
+                script "../../../huggingface/download/main.nf"
+                process {
+                    """
+                    input[0] = [
+                        [ id:'test_model' ],
+                        "ggml-org/gemma-3-1b-it-GGUF",
+                        "gemma-3-1b-it-Q4_K_M.gguf",
+                        "./hf_cache"
+                    ]
+                    """
+                }
+            }
+        }
+
+        when {
+            process {
+                """
+                input[0] = HF_DOWNLOAD.out.output.map { meta, model ->
+                    [ [ id:'test_run' ], file("${moduleDir}/tests/data/prompt.json"), model ]
+                }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.output.size() == 1 },
+                { assert process.out.output[0][0] == [ id:'test_run' ] },
+                { assert file(process.out.output[0][1]).name == "output.txt" },
+                { assert file(process.out.output[0][1]).size() > 0 },
+                { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+            )
+        }
+    }
+
+    test("stub - run inference with json prompt") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test_run' ],
+                    file("${moduleDir}/tests/data/prompt.json"),
+                    file("${moduleDir}/tests/data/stub_model.gguf")
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.output.size() == 1 },
+                { assert process.out.output[0][0] == [ id:'test_run' ] },
+                { assert file(process.out.output[0][1]).name == "output.txt" },
+                { assert snapshot(process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/llamacpp-python/run/tests/main.nf.test.snap b/modules/nf-core/llamacpp-python/run/tests/main.nf.test.snap
new file mode 100644
index 00000000000..4bd263f10e6
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/tests/main.nf.test.snap
@@ -0,0 +1,38 @@
+{
+    "run inference with downloaded gguf model": {
+        "content": [
+            {
+                "versions_llama_cpp_python": [
+                    [
+                        "LLAMACPP_PYTHON_RUN",
+                        "llama-cpp-python",
+                        "0.3.16"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-25T18:33:08.596485421",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    },
+    "stub - run inference with json prompt": {
+        "content": [
+            {
+                "versions_llama_cpp_python": [
+                    [
+                        "LLAMACPP_PYTHON_RUN",
+                        "llama-cpp-python",
+                        "0.3.16"
+                    ]
+                ]
+            }
+        ],
+        "timestamp": "2026-03-25T17:23:21.997786726",
+        "meta": {
+            "nf-test": "0.9.5",
+            "nextflow": "25.10.4"
+        }
+    }
+}
diff --git a/modules/nf-core/llamacpp-python/run/tests/nextflow.config b/modules/nf-core/llamacpp-python/run/tests/nextflow.config
new file mode 100644
index 00000000000..6806a86ca3e
--- /dev/null
+++ b/modules/nf-core/llamacpp-python/run/tests/nextflow.config
@@ -0,0 +1,10 @@
+nextflow.enable.moduleBinaries = true
+
+process {
+    withName: 'HF_DOWNLOAD' {
+        containerOptions = { workflow.profile.contains('docker') ? "--volume ${projectDir}/hf_cache:${projectDir}/hf_cache" : '' }
+    }
+    withName: 'LLAMACPP_PYTHON_RUN' {
+        ext.args = "--seed 42"
+    }
+}