diff --git a/README.md b/README.md index fc8e2c9e359..09e0a7a8323 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ - [WhiteBox SSD Mode (OCSSD)](#whitebox-ssd-mode-ocssd) - [Zoned Namespace SSD Mode (ZNSSD)](#zoned-namespace-ssd-mode-znssd) - [NoSSD Mode](#nossd-mode) + - [Computational Storage Mode (CSD)](#computational-storage-mode-csd) - [Configuration](#configuration) - [Development](#development) - [Troubleshooting](#troubleshooting) @@ -62,12 +63,12 @@ FEMU bridges the gap between SSD hardware platforms and SSD simulators by provid ## Features -| Feature | BlackBox | WhiteBox | ZNS | NoSSD | -|---------|----------|----------|-----|--------| -| **FTL Management** | Device-side | Host-side | Zone-based | None | -| **Use Cases** | Commercial SSD simulation | OpenChannel SSD research | ZNS research | SCM emulation | -| **Latency Model** | Realistic NAND | Realistic NAND | Zone-optimized | Ultra-low (sub-10μs) | -| **Guest Support** | Full NVMe | OpenChannel 1.2/2.0 | NVMe ZNS | NVMe basic | +| Feature | BlackBox | WhiteBox | ZNS | NoSSD | CSD | +|---------|----------|----------|-----|--------|-----| +| **FTL Management** | Device-side | Host-side | Zone-based | None | Device-side | +| **Use Cases** | Commercial SSD simulation | OpenChannel SSD research | ZNS research | SCM emulation | Computational storage research | +| **Latency Model** | Realistic NAND | Realistic NAND | Zone-optimized | Ultra-low (sub-10μs) | Realistic NAND + compute runtime | +| **Guest Support** | Full NVMe | OpenChannel 1.2/2.0 | NVMe ZNS | NVMe basic | Full NVMe + CSD commands | --- @@ -367,6 +368,42 @@ Ultra-fast NVMe emulation without storage logic. - Performance upper-bound testing - Fast storage prototyping +### Computational Storage Mode (CSD) + +Experimental computational storage support derived from CEMU. CSD is selected +with `femu_mode=4` and keeps CSD-specific code under `hw/femu/csd/`. + +```bash +./run-csd.sh +``` + +**Key Parameters:** +```bash +fdm_size=64 # Functional data memory size (MB), required +nr_cu=4 # Number of compute units +nr_thread=4 # Number of functional simulation threads +time_slice=200000 # Scheduler time slice (ns) +context_switch_time=200 # Context switch time (ns) +csf_runtime_scale=3 # Runtime scaling factor +``` + +**Current Scope:** +- Normal NVMe read/write through the device-side BBSSD FTL path in CSD mode +- Vendor commands for AFDM allocation, read/write, NVM-to-AFDM copy +- Phantom and shared-library CSF load/execute path using the original CEMU + lifecycle, `path\0symbol\0` program descriptor format, and program execute + fields (`pind`, `numr`, `dlen`, `cparam1`, `cparam2`, `group`, `runtime`) +- CEMU-style admin commands for CSF load/unload and activate/deactivate +- Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` + or `./femu-compile.sh --enable-csd-ubpf=/path/to/ubpf-cemu` +- Group/QoS command metadata +- Guest-side passthrough tests in `tests/femu-csd/` + +The initial CSD path does not require a CEMU-specific Linux kernel, FDMFS, or a +fixed VM image. Advanced CEMU features such as VM freezing, virtual clock +changes, and FDMFS are intentionally kept out of the default path while the base +mode is upstreamed. + --- ## Configuration @@ -451,6 +488,9 @@ hw/femu/ # Main FEMU implementation │ └── zftl.c # Zone-based FTL ├── nossd/ # NoSSD mode │ └── nop.c # Minimal processing +├── csd/ # Computational Storage mode +│ ├── csd.c # CSD command handling +│ └── csd.h # CSD private command definitions ├── timing-model/ # Performance modeling ├── backend/ # Storage backends └── lib/ # Utility libraries diff --git a/femu-scripts/femu-compile.sh b/femu-scripts/femu-compile.sh index 8cd710cc20d..24988632a6a 100755 --- a/femu-scripts/femu-compile.sh +++ b/femu-scripts/femu-compile.sh @@ -1,10 +1,28 @@ #!/bin/bash NRCPUS="$(cat /proc/cpuinfo | grep "vendor_id" | wc -l)" +FEMU_CONFIGURE_OPTS="" + +for arg in "$@"; do + case "$arg" in + --enable-csd-ubpf) + FEMU_CONFIGURE_OPTS="${FEMU_CONFIGURE_OPTS} --enable-femu-csd-ubpf" + ;; + --enable-csd-ubpf=*) + UBPF_PATH="${arg#*=}" + FEMU_CONFIGURE_OPTS="${FEMU_CONFIGURE_OPTS} --enable-femu-csd-ubpf -Dfemu_csd_ubpf_path=${UBPF_PATH}" + ;; + *) + echo "Unknown option: $arg" + echo "Usage: $0 [--enable-csd-ubpf[=/path/to/ubpf-cemu]]" + exit 1 + ;; + esac +done make clean # --disable-werror --extra-cflags=-w --disable-git-update -../configure --enable-kvm --target-list=x86_64-softmmu --enable-slirp +../configure --enable-kvm --target-list=x86_64-softmmu --enable-slirp ${FEMU_CONFIGURE_OPTS} make -j $NRCPUS echo "" diff --git a/femu-scripts/femu-copy-scripts.sh b/femu-scripts/femu-copy-scripts.sh index 8e49e319186..598a956688a 100755 --- a/femu-scripts/femu-copy-scripts.sh +++ b/femu-scripts/femu-copy-scripts.sh @@ -4,7 +4,7 @@ FSD="../femu-scripts" -CPL=(pkgdep.sh femu-compile.sh run-whitebox.sh run-blackbox.sh run-blackbox-fdp.sh run-nossd.sh run-zns.sh pin.sh ftk) +CPL=(pkgdep.sh femu-compile.sh run-whitebox.sh run-blackbox.sh run-blackbox-fdp.sh run-nossd.sh run-zns.sh run-csd.sh pin.sh ftk) echo "" echo "==> Copying following FEMU script to current directory:" @@ -18,4 +18,3 @@ do done echo "Done!" echo "" - diff --git a/femu-scripts/run-csd.sh b/femu-scripts/run-csd.sh new file mode 100755 index 00000000000..496c7121e4c --- /dev/null +++ b/femu-scripts/run-csd.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Run FEMU as a Computational Storage Drive (CSD) + +# Image directory +IMGDIR=$HOME/images +# Virtual machine disk image +OSIMGF=$IMGDIR/u20s.qcow2 + +# Configurable SSD Controller layout parameters (must be power of 2) +secsz=512 # sector size in bytes +secs_per_pg=8 # number of sectors in a flash page +pgs_per_blk=256 # number of pages per flash block +blks_per_pl=256 # number of blocks per plane +pls_per_lun=1 # keep it at one, no multiplanes support +luns_per_ch=8 # number of chips per channel +nchs=8 # number of channels +ssd_size=4096 # in megabytes + +# Latency in nanoseconds +pg_rd_lat=40000 # page read latency +pg_wr_lat=200000 # page write latency +blk_er_lat=2000000 # block erase latency +ch_xfer_lat=0 # channel transfer time, ignored for now + +# GC Threshold (1-100) +gc_thres_pcent=75 +gc_thres_pcent_high=95 + +# FEMU CSD parameters +fdm_size=64 +nr_cu=4 +nr_thread=4 +time_slice=200000 +context_switch_time=200 +csf_runtime_scale=3 + +#----------------------------------------------------------------------- + +# Compose the entire FEMU CSD command line options +FEMU_OPTIONS="-device femu" +FEMU_OPTIONS=${FEMU_OPTIONS}",devsz_mb=${ssd_size}" +FEMU_OPTIONS=${FEMU_OPTIONS}",namespaces=1" +FEMU_OPTIONS=${FEMU_OPTIONS}",femu_mode=4" +FEMU_OPTIONS=${FEMU_OPTIONS}",secsz=${secsz}" +FEMU_OPTIONS=${FEMU_OPTIONS}",secs_per_pg=${secs_per_pg}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pgs_per_blk=${pgs_per_blk}" +FEMU_OPTIONS=${FEMU_OPTIONS}",blks_per_pl=${blks_per_pl}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pls_per_lun=${pls_per_lun}" +FEMU_OPTIONS=${FEMU_OPTIONS}",luns_per_ch=${luns_per_ch}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nchs=${nchs}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pg_rd_lat=${pg_rd_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pg_wr_lat=${pg_wr_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",blk_er_lat=${blk_er_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",ch_xfer_lat=${ch_xfer_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",gc_thres_pcent=${gc_thres_pcent}" +FEMU_OPTIONS=${FEMU_OPTIONS}",gc_thres_pcent_high=${gc_thres_pcent_high}" +FEMU_OPTIONS=${FEMU_OPTIONS}",fdm_size=${fdm_size}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nr_cu=${nr_cu}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nr_thread=${nr_thread}" +FEMU_OPTIONS=${FEMU_OPTIONS}",time_slice=${time_slice}" +FEMU_OPTIONS=${FEMU_OPTIONS}",context_switch_time=${context_switch_time}" +FEMU_OPTIONS=${FEMU_OPTIONS}",csf_runtime_scale=${csf_runtime_scale}" + +echo ${FEMU_OPTIONS} + +if [[ ! -e "$OSIMGF" ]]; then + echo "" + echo "VM disk image couldn't be found ..." + echo "Please prepare a usable VM image and place it as $OSIMGF" + echo "Once VM disk image is ready, please rerun this script again" + echo "" + exit +fi + +sudo ./qemu-system-x86_64 \ + -name "FEMU-CSD-VM" \ + -enable-kvm \ + -cpu host \ + -smp 4 \ + -m 4G \ + -device virtio-scsi-pci,id=scsi0 \ + -device scsi-hd,drive=hd0 \ + -drive file=$OSIMGF,if=none,aio=native,cache=none,format=qcow2,id=hd0 \ + ${FEMU_OPTIONS} \ + -net user,hostfwd=tcp::8080-:22 \ + -net nic,model=virtio \ + -nographic \ + -qmp unix:./qmp-sock,server,nowait 2>&1 | tee log diff --git a/hw/femu/backend/dram.c b/hw/femu/backend/dram.c index a58068634c9..c34dc663b72 100644 --- a/hw/femu/backend/dram.c +++ b/hw/femu/backend/dram.c @@ -57,7 +57,8 @@ int backend_rw(SsdDramBackend *b, QEMUSGList *qsg, uint64_t *lbal, bool is_write mb_oft = lbal[sg_cur_index]; } else if (b->femu_mode == FEMU_BBSSD_MODE || b->femu_mode == FEMU_NOSSD_MODE || - b->femu_mode == FEMU_ZNSSD_MODE) { + b->femu_mode == FEMU_ZNSSD_MODE || + b->femu_mode == FEMU_CSD_MODE) { mb_oft += cur_len; } else { assert(0); diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c new file mode 100644 index 00000000000..e2674b28218 --- /dev/null +++ b/hw/femu/csd/csd.c @@ -0,0 +1,1168 @@ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include + +#include "csd.h" +#include "../bbssd/ftl.h" + +#ifdef CONFIG_FEMU_CSD_UBPF +#include +#endif + +typedef int64_t (*FemuCsdSharedLibFn)(FemuCsdArgs *args); + +#define CSD_EXEC_DATA_MAX (1U << 20) + +typedef struct FemuCsdAfdm { + uint32_t id; + uint64_t size; + uint8_t *data; +} FemuCsdAfdm; + +typedef struct FemuCsdProgram { + uint32_t id; + uint8_t type; + bool active; + bool indirect; + bool loading; + uint32_t runtime; + uint16_t runtime_scale; + uint64_t size; + uint64_t load_size; + uint64_t pid; + uint8_t *data; + GModule *module; + FemuCsdSharedLibFn shared_lib_fn; +#ifdef CONFIG_FEMU_CSD_UBPF + struct ubpf_vm *ubpf_vm; + ubpf_jit_fn ubpf_jit_fn; +#endif +} FemuCsdProgram; + +typedef struct FemuCsdGroup { + uint32_t id; + int8_t prio; + uint8_t qos_flags; + uint32_t bandwidth; + uint32_t deadline; +} FemuCsdGroup; + +typedef struct FemuCsdMrs { + uint16_t rsid; + uint32_t numr; + NvmeCsdMemoryRange *ranges; +} FemuCsdMrs; + +typedef struct FemuCsdState { + CsdCtrlParams params; + uint64_t fdm_capacity; + uint64_t fdm_used; + uint32_t next_afdm_id; + uint32_t next_group_id; + uint32_t next_rsid; + GHashTable *afdms; + GHashTable *programs; + GHashTable *groups; + GHashTable *mrs; + QemuMutex lock; +} FemuCsdState; + +static void csd_check_size(void) +{ + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdAllocFdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeallocAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdNvmToAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdExecCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdMemoryRange) != 32); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdReadAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdWriteAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdCreateGroupCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdSetQosCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeleteGroupCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdLoadProgramCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdProgramActivationCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdMrsMgmtCmd) != 64); +} + +static FemuCsdState *csd_state(FemuCtrl *n) +{ + return n->ext_ops.state; +} + +static void csd_afdm_free(gpointer opaque) +{ + FemuCsdAfdm *afdm = opaque; + + if (!afdm) { + return; + } + + g_free(afdm->data); + g_free(afdm); +} + +static void csd_program_unload(FemuCsdProgram *program) +{ + if (program->module) { + g_module_close(program->module); + program->module = NULL; + program->shared_lib_fn = NULL; + } + +#ifdef CONFIG_FEMU_CSD_UBPF + if (program->ubpf_vm) { + ubpf_destroy(program->ubpf_vm); + program->ubpf_vm = NULL; + program->ubpf_jit_fn = NULL; + } +#endif +} + +static void csd_program_free(gpointer opaque) +{ + FemuCsdProgram *program = opaque; + + if (!program) { + return; + } + + csd_program_unload(program); + g_free(program->data); + g_free(program); +} + +static void csd_mrs_free(gpointer opaque) +{ + FemuCsdMrs *mrs = opaque; + + if (!mrs) { + return; + } + + g_free(mrs->ranges); + g_free(mrs); +} + +static void csd_init_ctrl_str(FemuCtrl *n) +{ + static int csd_id; + const char *mn = "FEMU Computational Storage Controller"; + const char *sn = "vCSD"; + + nvme_set_ctrl_name(n, mn, sn, &csd_id); +} + +static void csd_init(FemuCtrl *n, Error **errp) +{ + FemuCsdState *csd; + struct ssd *ssd; + + csd_check_size(); + + if (n->csd_params.fdm_size_mb == 0) { + error_setg(errp, "CSD mode requires fdm_size to be non-zero"); + return; + } + + if (n->csd_params.fdm_size_mb > UINT64_MAX / MiB) { + error_setg(errp, "CSD fdm_size is too large"); + return; + } + + if (n->csd_params.nr_cu == 0 || n->csd_params.nr_cu > 64) { + error_setg(errp, "CSD nr_cu must be in range [1, 64]"); + return; + } + + if (n->csd_params.nr_thread == 0) { + error_setg(errp, "CSD nr_thread must be non-zero"); + return; + } + + if (n->csd_params.csf_runtime_scale == 0) { + error_setg(errp, "CSD csf_runtime_scale must be non-zero"); + return; + } + + csd_init_ctrl_str(n); + + ssd = n->ssd = g_malloc0(sizeof(*ssd)); + ssd->dataplane_started_ptr = &n->dataplane_started; + ssd->ssdname = (char *)n->devname; + ssd_init(n); + + csd = g_new0(FemuCsdState, 1); + csd->params = n->csd_params; + csd->fdm_capacity = n->csd_params.fdm_size_mb * MiB; + csd->next_afdm_id = 1; + csd->next_group_id = 1; + csd->next_rsid = 1; + csd->afdms = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + csd_afdm_free); + csd->programs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + csd_program_free); + csd->groups = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + g_free); + csd->mrs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + csd_mrs_free); + qemu_mutex_init(&csd->lock); + n->ext_ops.state = csd; + + femu_log("%s,CSD mode initialized: fdm=%" PRIu64 "MB, " + "nr_cu=%u, nr_thread=%u\n", + n->devname, csd->params.fdm_size_mb, csd->params.nr_cu, + csd->params.nr_thread); +} + +static void csd_exit(FemuCtrl *n) +{ + FemuCsdState *csd = csd_state(n); + + if (!csd) { + return; + } + + g_hash_table_destroy(csd->afdms); + g_hash_table_destroy(csd->programs); + g_hash_table_destroy(csd->groups); + g_hash_table_destroy(csd->mrs); + qemu_mutex_destroy(&csd->lock); + g_free(csd); + n->ext_ops.state = NULL; +} + +static FemuCsdProgram *csd_get_program_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->programs, GUINT_TO_POINTER(id)); +} + +static FemuCsdAfdm *csd_get_afdm_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->afdms, GUINT_TO_POINTER(id)); +} + +static FemuCsdGroup *csd_get_group_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->groups, GUINT_TO_POINTER(id)); +} + +static FemuCsdMrs *csd_get_mrs_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->mrs, GUINT_TO_POINTER(id)); +} + +static uint16_t csd_check_afdm_range(FemuCsdAfdm *afdm, uint64_t offset, + uint64_t size) +{ + if (!afdm || size == 0 || offset > afdm->size || + size > afdm->size - offset) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (size > UINT32_MAX) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_parse_program(FemuCsdProgram *program, const char **path, + const char **symbol) +{ + char *name; + size_t path_len; + size_t symbol_len; + + if (!program->data || program->size < 3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + name = memchr(program->data, '\0', program->size); + if (!name || name == (char *)program->data) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + path_len = name - (char *)program->data; + if (path_len + 1 >= program->size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + *path = (const char *)program->data; + *symbol = name + 1; + symbol_len = strnlen(*symbol, program->size - path_len - 1); + if (symbol_len == 0 || path_len + symbol_len + 2 > program->size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_check_nvm_ftl_range(FemuCtrl *n, uint64_t slba, + uint64_t nlb, uint64_t *mapped_pages) +{ + struct ssd *ssd = n->ssd; + struct ssdparams *spp; + uint64_t start_lpn; + uint64_t end_lpn; + + if (!ssd || !ssd->maptbl) { + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; + } + + spp = &ssd->sp; + if (spp->secs_per_pg <= 0 || spp->tt_pgs == 0 || nlb == 0) { + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; + } + + start_lpn = slba / spp->secs_per_pg; + if (slba > UINT64_MAX - nlb + 1) { + return NVME_LBA_RANGE | NVME_DNR; + } + + end_lpn = (slba + nlb - 1) / spp->secs_per_pg; + if (end_lpn >= spp->tt_pgs) { + return NVME_LBA_RANGE | NVME_DNR; + } + + *mapped_pages = 0; + for (uint64_t lpn = start_lpn; lpn <= end_lpn; lpn++) { + if (ssd->maptbl[lpn].ppa != UNMAPPED_PPA) { + (*mapped_pages)++; + } + } + + return NVME_SUCCESS; +} + +static uint16_t csd_load_shared_lib(FemuCsdProgram *program) +{ + const char *path; + const char *symbol; + gpointer fn = NULL; + uint16_t status; + + status = csd_parse_program(program, &path, &symbol); + if (status) { + return status; + } + + program->module = g_module_open(path, G_MODULE_BIND_LOCAL); + if (!program->module) { + femu_err("CSD: failed to load shared library %s: %s\n", path, + g_module_error()); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (!g_module_symbol(program->module, symbol, &fn) || !fn) { + femu_err("CSD: failed to find shared library symbol %s: %s\n", symbol, + g_module_error()); + csd_program_unload(program); + return NVME_INVALID_FIELD | NVME_DNR; + } + + program->shared_lib_fn = (FemuCsdSharedLibFn)fn; + return NVME_SUCCESS; +} + +static uint16_t csd_load_ubpf(FemuCsdProgram *program, bool jit) +{ +#ifdef CONFIG_FEMU_CSD_UBPF + const char *path; + const char *symbol; + g_autofree char *elf = NULL; + gsize elf_size = 0; + g_autoptr(GError) err = NULL; + char *errmsg = NULL; + uint16_t status; + + status = csd_parse_program(program, &path, &symbol); + if (status) { + return status; + } + + if (!g_file_get_contents(path, &elf, &elf_size, &err)) { + femu_err("CSD: failed to read uBPF program %s: %s\n", path, + err ? err->message : "unknown error"); + return NVME_INVALID_FIELD | NVME_DNR; + } + + program->ubpf_vm = ubpf_create(); + if (!program->ubpf_vm) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (ubpf_load_elf_ex(program->ubpf_vm, elf, elf_size, symbol, &errmsg) < 0) { + femu_err("CSD: failed to load uBPF ELF %s:%s: %s\n", path, symbol, + errmsg ? errmsg : "unknown error"); + free(errmsg); + csd_program_unload(program); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (jit) { + program->ubpf_jit_fn = ubpf_compile(program->ubpf_vm, &errmsg); + if (!program->ubpf_jit_fn) { + femu_err("CSD: failed to JIT uBPF ELF %s:%s: %s\n", path, symbol, + errmsg ? errmsg : "unknown error"); + free(errmsg); + csd_program_unload(program); + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + return NVME_SUCCESS; +#else + return NVME_INVALID_FIELD | NVME_DNR; +#endif +} + +static uint16_t csd_load_program_data(FemuCsdProgram *program, bool jit) +{ + switch (program->type) { + case NVME_CSD_CSF_TYPE_PHANTOM: + return NVME_SUCCESS; + case NVME_CSD_CSF_TYPE_SHARED_LIB: + return csd_load_shared_lib(program); + case NVME_CSD_CSF_TYPE_EBPF: + return csd_load_ubpf(program, jit); + default: + return NVME_INVALID_FIELD | NVME_DNR; + } +} + +static uint16_t csd_compute_load(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdLoadProgramCmd *load = (NvmeCsdLoadProgramCmd *)cmd; + uint16_t pind = le16_to_cpu(load->pind); + uint32_t psize = le32_to_cpu(load->psize); + uint32_t numb = le32_to_cpu(load->numb); + uint32_t loff = le32_to_cpu(load->loff); + uint64_t pid = le64_to_cpu(load->pid); + uint64_t prp1 = le64_to_cpu(load->prp1); + uint64_t prp2 = le64_to_cpu(load->prp2); + FemuCsdProgram *program; + uint16_t status = NVME_SUCCESS; + + if (pind == 0 || psize > UINT32_MAX || loff > psize || + numb > psize - loff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + + if (load->sel) { + program = csd_get_program_locked(csd, pind); + if (!program) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + if (program->active) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + g_hash_table_remove(csd->programs, GUINT_TO_POINTER((uint32_t)pind)); + qemu_mutex_unlock(&csd->lock); + return NVME_SUCCESS; + } + + if (loff == 0) { + program = g_new0(FemuCsdProgram, 1); + program->id = pind; + program->type = load->ptype; + program->runtime = le32_to_cpu(load->runtime); + program->runtime_scale = le16_to_cpu(load->runtime_scale); + program->size = psize; + program->pid = pid; + program->indirect = load->indirect; + program->loading = true; + if (psize) { + program->data = g_malloc0(psize); + } + g_hash_table_replace(csd->programs, GUINT_TO_POINTER((uint32_t)pind), + program); + } else { + program = csd_get_program_locked(csd, pind); + if (!program || program->size != psize || + (load->pit == 1 && program->pid != pid) || + program->type != load->ptype) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + if (numb) { + status = dma_write_prp(n, program->data + loff, numb, prp1, prp2); + if (status) { + qemu_mutex_unlock(&csd->lock); + return status | NVME_DNR; + } + program->load_size += numb; + } + + if (program->load_size == program->size) { + status = csd_load_program_data(program, load->jit); + if (!status) { + program->loading = false; + program->active = false; + } + } + + qemu_mutex_unlock(&csd->lock); + return status; +} + +static uint16_t csd_compute_activate(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdProgramActivationCmd *activation = + (NvmeCsdProgramActivationCmd *)cmd; + uint16_t pind = activation->pind; + uint8_t sel = activation->sel; + FemuCsdProgram *program; + + if (pind == 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + program = csd_get_program_locked(csd, pind); + if (!program || program->loading) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (sel) { + case 0: + program->active = false; + break; + case 1: + program->active = true; + break; + default: + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_unlock(&csd->lock); + return NVME_SUCCESS; +} + +static uint16_t csd_mrs_mgmt(FemuCtrl *n, NvmeCmd *cmd, NvmeCqe *cqe) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdMrsMgmtCmd *manage = (NvmeCsdMrsMgmtCmd *)cmd; + uint16_t rsid = le16_to_cpu(manage->rsid); + uint32_t sel = manage->sel; + uint32_t numr = manage->numr; + uint64_t prp1 = le64_to_cpu(manage->prp1); + uint64_t prp2 = le64_to_cpu(manage->prp2); + NvmeCsdMemoryRange *ranges = NULL; + FemuCsdMrs *mrs; + uint32_t id; + uint16_t status = NVME_SUCCESS; + + switch (sel) { + case 0: + if (rsid != 0 || numr == 0 || + numr > CSD_EXEC_DATA_MAX / sizeof(NvmeCsdMemoryRange)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ranges = g_new0(NvmeCsdMemoryRange, numr); + status = dma_write_prp(n, (uint8_t *)ranges, + numr * sizeof(*ranges), prp1, prp2); + if (status) { + g_free(ranges); + return status; + } + + qemu_mutex_lock(&csd->lock); + id = csd->next_rsid++; + if (id == 0) { + csd->next_rsid = 1; + id = csd->next_rsid++; + } + while (g_hash_table_contains(csd->mrs, GUINT_TO_POINTER(id))) { + id = csd->next_rsid++; + if (id == 0) { + csd->next_rsid = 1; + id = csd->next_rsid++; + } + } + + mrs = g_new0(FemuCsdMrs, 1); + mrs->rsid = id; + mrs->numr = numr; + mrs->ranges = ranges; + g_hash_table_insert(csd->mrs, GUINT_TO_POINTER(id), mrs); + qemu_mutex_unlock(&csd->lock); + + cqe->n.result = id; + return NVME_SUCCESS; + + case 1: + if (rsid == 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + if (!csd_get_mrs_locked(csd, rsid)) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + g_hash_table_remove(csd->mrs, GUINT_TO_POINTER((uint32_t)rsid)); + qemu_mutex_unlock(&csd->lock); + cqe->n.result = 0; + return NVME_SUCCESS; + + default: + return NVME_INVALID_FIELD | NVME_DNR; + } +} + +static uint16_t csd_build_exec_args_locked(FemuCsdState *csd, + NvmeCsdMemoryRange *ranges, + uint32_t numr, + FemuCsdArgs *args, + void ***mr_addrp, + long long **mr_lenp) +{ + void **mr_addr = g_new0(void *, numr); + long long *mr_len = g_new0(long long, numr); + + for (uint32_t i = 0; i < numr; i++) { + uint32_t nsid = le32_to_cpu(ranges[i].nsid); + uint32_t len = le32_to_cpu(ranges[i].len); + uint64_t sb = le64_to_cpu(ranges[i].sb); + FemuCsdAfdm *afdm; + + if (nsid != NVME_CSD_MR_AFDM_NSID) { + g_free(mr_addr); + g_free(mr_len); + return NVME_INVALID_FIELD | NVME_DNR; + } + + afdm = csd_get_afdm_locked(csd, sb); + if (!afdm) { + g_free(mr_addr); + g_free(mr_len); + return NVME_INVALID_FIELD | NVME_DNR; + } + if (len == 0) { + len = afdm->size > UINT32_MAX ? UINT32_MAX : afdm->size; + } + if (len > afdm->size) { + g_free(mr_addr); + g_free(mr_len); + return NVME_INVALID_FIELD | NVME_DNR; + } + + mr_addr[i] = afdm->data; + mr_len[i] = len; + } + + args->numr = numr; + args->mr_addr = mr_addr; + args->mr_len = mr_len; + *mr_addrp = mr_addr; + *mr_lenp = mr_len; + + return NVME_SUCCESS; +} + +static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdExecCmd *exec = (NvmeCsdExecCmd *)cmd; + uint16_t pind = le16_to_cpu(exec->pind); + uint16_t rsid = le16_to_cpu(exec->rsid); + uint32_t numr = le32_to_cpu(exec->numr); + uint32_t dlen = le32_to_cpu(exec->dlen); + uint64_t cparam1 = le64_to_cpu(exec->cparam1); + uint64_t cparam2 = le64_to_cpu(exec->cparam2); + uint32_t group_id = exec->group; + uint32_t runtime = le32_to_cpu(exec->runtime); + uint64_t prp1 = le64_to_cpu(exec->prp1); + uint64_t prp2 = le64_to_cpu(exec->prp2); + FemuCsdProgram *program; + FemuCsdMrs *mrs = NULL; + uint64_t copy_size; + uint8_t *data = NULL; + NvmeCsdMemoryRange *ranges = NULL; + void **mr_addr = NULL; + long long *mr_len = NULL; + FemuCsdArgs args = { 0 }; + int64_t result = 0; + uint16_t status = NVME_SUCCESS; + + if (dlen == 0 && numr > 0) { + dlen = numr * sizeof(NvmeCsdMemoryRange); + } + + if (pind == 0 || (rsid == 0 && numr == 0) || + numr > CSD_EXEC_DATA_MAX / sizeof(NvmeCsdMemoryRange)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (rsid != 0 && numr != 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (dlen < numr * sizeof(NvmeCsdMemoryRange) || dlen > CSD_EXEC_DATA_MAX) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (numr) { + data = g_malloc0(dlen); + status = dma_write_prp(n, data, dlen, prp1, prp2); + if (status) { + g_free(data); + return status; + } + ranges = (NvmeCsdMemoryRange *)data; + } + + qemu_mutex_lock(&csd->lock); + program = csd_get_program_locked(csd, pind); + if (!program) { + qemu_mutex_unlock(&csd->lock); + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + if (!program->active) { + qemu_mutex_unlock(&csd->lock); + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + + if (group_id != 0 && !csd_get_group_locked(csd, group_id)) { + qemu_mutex_unlock(&csd->lock); + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + + if (runtime == 0) { + runtime = program->runtime; + } + + if (rsid) { + mrs = csd_get_mrs_locked(csd, rsid); + if (!mrs) { + qemu_mutex_unlock(&csd->lock); + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + ranges = mrs->ranges; + numr = mrs->numr; + dlen = 0; + } + + status = csd_build_exec_args_locked(csd, ranges, numr, &args, + &mr_addr, &mr_len); + if (status) { + qemu_mutex_unlock(&csd->lock); + goto out; + } + args.cparam1 = cparam1; + args.cparam2 = cparam2; + args.data_buffer = data && dlen > numr * sizeof(NvmeCsdMemoryRange) ? + data + numr * sizeof(NvmeCsdMemoryRange) : NULL; + args.buffer_len = args.data_buffer ? + dlen - numr * sizeof(NvmeCsdMemoryRange) : 0; + + switch (program->type) { + case NVME_CSD_CSF_TYPE_PHANTOM: + if (args.numr >= 2) { + copy_size = MIN(args.mr_len[0], args.mr_len[1]); + memcpy(args.mr_addr[0], args.mr_addr[1], copy_size); + result = copy_size > INT64_MAX ? INT64_MAX : copy_size; + } + break; + case NVME_CSD_CSF_TYPE_SHARED_LIB: + if (!program->shared_lib_fn) { + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + result = program->shared_lib_fn(&args); + break; + case NVME_CSD_CSF_TYPE_EBPF: +#ifdef CONFIG_FEMU_CSD_UBPF + if (!program->ubpf_vm) { + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + if (program->ubpf_jit_fn) { + result = program->ubpf_jit_fn((struct ubpf_jit_args *)&args); + } else { + uint64_t ubpf_result; + + if (ubpf_exec(program->ubpf_vm, (struct ubpf_jit_args *)&args, + &ubpf_result) < 0) { + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + result = ubpf_result; + } +#else + status = NVME_INVALID_FIELD | NVME_DNR; +#endif + break; + default: + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + if (!status) { + req->cqe.n.result = result > UINT32_MAX ? UINT32_MAX : result; + } + g_free(mr_addr); + g_free(mr_len); + qemu_mutex_unlock(&csd->lock); + + if (status) { + goto out; + } + + if (runtime) { + req->reqlat += runtime; + req->expire_time += runtime; + } + +out: + g_free(data); + return status; +} + +static uint16_t csd_normalize_prio(int8_t *prio) +{ + if (*prio == 0) { + *prio = 5; + } + + if (*prio < 1 || *prio > 9) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_create_group(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdCreateGroupCmd *create = (NvmeCsdCreateGroupCmd *)cmd; + FemuCsdGroup *group; + uint32_t id; + int8_t prio = create->prio; + uint16_t status; + + status = csd_normalize_prio(&prio); + if (status) { + return status; + } + + group = g_new0(FemuCsdGroup, 1); + group->prio = prio; + group->qos_flags = create->qos_flags; + group->bandwidth = le32_to_cpu(create->bandwidth); + group->deadline = le32_to_cpu(create->deadline); + + qemu_mutex_lock(&csd->lock); + id = csd->next_group_id++; + if (id == 0) { + csd->next_group_id = 1; + id = csd->next_group_id++; + } + group->id = id; + g_hash_table_insert(csd->groups, GUINT_TO_POINTER(id), group); + qemu_mutex_unlock(&csd->lock); + + req->cqe.n.result = id; + return NVME_SUCCESS; +} + +static uint16_t csd_set_qos(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdSetQosCmd *set = (NvmeCsdSetQosCmd *)cmd; + uint32_t id = le32_to_cpu(set->id); + int8_t prio = set->prio; + FemuCsdGroup *group; + uint16_t status; + + status = csd_normalize_prio(&prio); + if (status) { + return status; + } + + qemu_mutex_lock(&csd->lock); + group = csd_get_group_locked(csd, id); + if (!group) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + group->prio = prio; + group->qos_flags = set->qos_flags; + group->bandwidth = le32_to_cpu(set->bandwidth); + group->deadline = le32_to_cpu(set->deadline); + qemu_mutex_unlock(&csd->lock); + + return NVME_SUCCESS; +} + +static uint16_t csd_delete_group(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdDeleteGroupCmd *delete = (NvmeCsdDeleteGroupCmd *)cmd; + uint32_t id = le32_to_cpu(delete->id); + + qemu_mutex_lock(&csd->lock); + if (!csd_get_group_locked(csd, id)) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + g_hash_table_remove(csd->groups, GUINT_TO_POINTER(id)); + qemu_mutex_unlock(&csd->lock); + + return NVME_SUCCESS; +} + +static uint16_t csd_alloc_fdm(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdAllocFdmCmd *alloc = (NvmeCsdAllocFdmCmd *)cmd; + FemuCsdAfdm *afdm; + uint64_t size = le64_to_cpu(alloc->size); + uint32_t id; + + if (alloc->type != NVME_CSD_FDM_TYPE_HOST || size == 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + if (size > csd->fdm_capacity - csd->fdm_used) { + qemu_mutex_unlock(&csd->lock); + return NVME_CAP_EXCEEDED | NVME_DNR; + } + + id = csd->next_afdm_id++; + if (id == 0) { + csd->next_afdm_id = 1; + id = csd->next_afdm_id++; + } + + afdm = g_new0(FemuCsdAfdm, 1); + afdm->id = id; + afdm->size = size; + afdm->data = g_malloc0(size); + + g_hash_table_insert(csd->afdms, GUINT_TO_POINTER(id), afdm); + csd->fdm_used += size; + qemu_mutex_unlock(&csd->lock); + + req->cqe.n.result = id; + return NVME_SUCCESS; +} + +static uint16_t csd_dealloc_afdm(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdDeallocAfdmCmd *dealloc = (NvmeCsdDeallocAfdmCmd *)cmd; + uint32_t id = le32_to_cpu(dealloc->id); + FemuCsdAfdm *afdm; + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + if (!afdm) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + csd->fdm_used -= afdm->size; + g_hash_table_remove(csd->afdms, GUINT_TO_POINTER(id)); + qemu_mutex_unlock(&csd->lock); + + return NVME_SUCCESS; +} + +static uint16_t csd_read_afdm(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdReadAfdmCmd *read = (NvmeCsdReadAfdmCmd *)cmd; + uint32_t id = le32_to_cpu(read->id); + uint64_t offset = le64_to_cpu(read->offset); + uint64_t size = le64_to_cpu(read->size); + uint64_t prp1 = le64_to_cpu(read->prp1); + uint64_t prp2 = le64_to_cpu(read->prp2); + FemuCsdAfdm *afdm; + uint16_t status; + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + status = csd_check_afdm_range(afdm, offset, size); + if (!status) { + status = dma_read_prp(n, afdm->data + offset, size, prp1, prp2); + } + qemu_mutex_unlock(&csd->lock); + + if (status) { + return status | NVME_DNR; + } + + req->cqe.n.result = size; + return NVME_SUCCESS; +} + +static uint16_t csd_write_afdm(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdWriteAfdmCmd *write = (NvmeCsdWriteAfdmCmd *)cmd; + uint32_t id = le32_to_cpu(write->id); + uint64_t offset = le64_to_cpu(write->offset); + uint64_t size = le64_to_cpu(write->size); + uint64_t prp1 = le64_to_cpu(write->prp1); + uint64_t prp2 = le64_to_cpu(write->prp2); + FemuCsdAfdm *afdm; + uint16_t status; + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + status = csd_check_afdm_range(afdm, offset, size); + if (!status) { + status = dma_write_prp(n, afdm->data + offset, size, prp1, prp2); + } + qemu_mutex_unlock(&csd->lock); + + if (status) { + return status | NVME_DNR; + } + + req->cqe.n.result = size; + return NVME_SUCCESS; +} + +static uint16_t csd_nvm_to_afdm(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdNvmToAfdmCmd *copy = (NvmeCsdNvmToAfdmCmd *)cmd; + const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + const uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads; + uint32_t id = le32_to_cpu(copy->id); + uint64_t offset = le64_to_cpu(copy->offset); + uint64_t slba = le64_to_cpu(copy->slba); + uint64_t nlb = le16_to_cpu(copy->nlb) + 1; + uint64_t size = nlb << data_shift; + uint64_t nvm_offset = slba << data_shift; + uint64_t mapped_pages; + FemuCsdAfdm *afdm; + uint16_t status; + + if (slba > le64_to_cpu(ns->id_ns.nsze) || + nlb > le64_to_cpu(ns->id_ns.nsze) - slba || + nvm_offset > n->mbe->size || size > n->mbe->size - nvm_offset) { + return NVME_LBA_RANGE | NVME_DNR; + } + + status = csd_check_nvm_ftl_range(n, slba, nlb, &mapped_pages); + if (status) { + return status; + } + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + status = csd_check_afdm_range(afdm, offset, size); + if (!status) { + memcpy(afdm->data + offset, + (uint8_t *)n->mbe->logical_space + nvm_offset, size); + } + qemu_mutex_unlock(&csd->lock); + + if (status) { + return status; + } + + req->cqe.n.result = size; + if (mapped_pages) { + req->reqlat += n->ssd->sp.pg_rd_lat; + req->expire_time += n->ssd->sp.pg_rd_lat; + } + return NVME_SUCCESS; +} + +static uint16_t csd_io_cmd(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + switch (cmd->opcode) { + case NVME_CMD_READ: + case NVME_CMD_WRITE: + return nvme_rw(n, ns, cmd, req); + case NVME_CMD_CSD_ALLOC_FDM: + return csd_alloc_fdm(n, cmd, req); + case NVME_CMD_CSD_DEALLOC_AFDM: + return csd_dealloc_afdm(n, cmd); + case NVME_CMD_CSD_NVM_TO_AFDM: + return csd_nvm_to_afdm(n, ns, cmd, req); + case NVME_CMD_CSD_EXEC: + return csd_exec(n, cmd, req); + case NVME_CMD_CSD_READ_AFDM: + return csd_read_afdm(n, cmd, req); + case NVME_CMD_CSD_WRITE_AFDM: + return csd_write_afdm(n, cmd, req); + case NVME_CMD_CSD_CREATE_GROUP: + return csd_create_group(n, cmd, req); + case NVME_CMD_CSD_SET_QOS: + return csd_set_qos(n, cmd); + case NVME_CMD_CSD_DELETE_GROUP: + return csd_delete_group(n, cmd); + default: + return NVME_INVALID_OPCODE | NVME_DNR; + } +} + +static uint16_t csd_admin_cmd(FemuCtrl *n, NvmeCmd *cmd, NvmeCqe *cqe) +{ + switch (cmd->opcode) { + case NVME_ADM_CMD_CSD_MRS_MGMT: + return csd_mrs_mgmt(n, cmd, cqe); + case NVME_ADM_CMD_CSD_COMPUTE_LOAD: + case NVME_ADM_CMD_CSD_COMPUTE_LOAD_DATA: + return csd_compute_load(n, cmd); + case NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE: + return csd_compute_activate(n, cmd); + default: + return NVME_INVALID_OPCODE | NVME_DNR; + } +} + +int nvme_register_csd(FemuCtrl *n) +{ + n->ext_ops = (FemuExtCtrlOps) { + .state = NULL, + .init = csd_init, + .exit = csd_exit, + .rw_check_req = NULL, + .start_ctrl = NULL, + .admin_cmd = NULL, + .admin_cmd_cqe = csd_admin_cmd, + .io_cmd = csd_io_cmd, + .get_log = NULL, + }; + + return 0; +} diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h new file mode 100644 index 00000000000..91a17c21ecf --- /dev/null +++ b/hw/femu/csd/csd.h @@ -0,0 +1,244 @@ +#ifndef FEMU_CSD_H +#define FEMU_CSD_H + +#include "../nvme.h" + +enum FemuCsdIoCommands { + NVME_CMD_CSD_ALLOC_FDM = 0xb0, + NVME_CMD_CSD_DEALLOC_AFDM = 0xc0, + NVME_CMD_CSD_NVM_TO_AFDM = 0xd0, + NVME_CMD_CSD_EXEC = 0xe1, + NVME_CMD_CSD_READ_AFDM = 0xf2, + NVME_CMD_CSD_WRITE_AFDM = 0xf5, + NVME_CMD_CSD_CREATE_GROUP = 0xf6, + NVME_CMD_CSD_SET_QOS = 0xf7, + NVME_CMD_CSD_DELETE_GROUP = 0xf8, +}; + +enum FemuCsdAdminCommands { + NVME_ADM_CMD_CSD_MRS_MGMT = 0x21, + NVME_ADM_CMD_CSD_COMPUTE_LOAD = 0x22, + NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE = 0x23, + NVME_ADM_CMD_CSD_COMPUTE_LOAD_DATA = 0x25, +}; + +#define NVME_CSD_MR_AFDM_NSID 0 + +enum FemuCsdFdmType { + NVME_CSD_FDM_TYPE_HOST = 0, +}; + +enum FemuCsdCsfType { + NVME_CSD_CSF_TYPE_PHANTOM = 0, + NVME_CSD_CSF_TYPE_EBPF = 1, + NVME_CSD_CSF_TYPE_BITSTREAM = 2, + NVME_CSD_CSF_TYPE_SHARED_LIB = 3, +}; + +typedef struct QEMU_PACKED NvmeCsdLoadProgramCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint16_t jit:1; + uint16_t rsvd_ctrl:15; + uint16_t runtime_scale; + uint32_t runtime; + uint32_t rsvd4[2]; + uint64_t prp1; + uint64_t prp2; + uint16_t pind; + uint8_t ptype; + uint8_t sel:1; + uint8_t pit:3; + uint8_t indirect:1; + uint8_t rsvd10:3; + uint32_t psize; + uint64_t pid; + uint32_t numb; + uint32_t loff; +} NvmeCsdLoadProgramCmd; + +typedef struct QEMU_PACKED NvmeCsdProgramActivationCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd[4]; + uint64_t prp1; + uint64_t prp2; + uint32_t pind:16; + uint32_t sel:4; + uint32_t rsvd10:12; + uint32_t runtime; + uint32_t rsvd12[4]; +} NvmeCsdProgramActivationCmd; + +typedef struct QEMU_PACKED NvmeCsdAllocFdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t size; + uint8_t type; + uint8_t rsvd14[7]; + uint64_t rsvd15; +} NvmeCsdAllocFdmCmd; + +typedef struct QEMU_PACKED NvmeCsdDeallocAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t id; + uint32_t rsvd11[5]; +} NvmeCsdDeallocAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdNvmToAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t slba; + uint16_t nlb; + uint16_t rsvd12; + uint32_t id; + uint64_t offset; +} NvmeCsdNvmToAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdExecCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint16_t pind; + uint16_t rsid; + uint32_t numr; + uint32_t dlen; + uint32_t rsvd4; + uint64_t prp1; + uint64_t prp2; + uint64_t cparam1; + uint64_t cparam2; + uint32_t group:8; + uint32_t chunk_nlb:24; + uint32_t runtime; +} NvmeCsdExecCmd; + +typedef struct QEMU_PACKED NvmeCsdMemoryRange { + uint32_t nsid; + uint32_t len; + uint64_t sb; + uint64_t rsvd[2]; +} NvmeCsdMemoryRange; + +typedef struct FemuCsdArgs { + int numr; + void **mr_addr; + long long *mr_len; + long long cparam1; + long long cparam2; + void *data_buffer; + long long buffer_len; +} QEMU_PACKED FemuCsdArgs; + +typedef struct QEMU_PACKED NvmeCsdReadAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t offset; + uint64_t size; + uint32_t id; + uint32_t rsvd15; +} NvmeCsdReadAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdWriteAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t offset; + uint64_t size; + uint32_t id; + uint32_t rsvd15; +} NvmeCsdWriteAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdCreateGroupCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + int8_t prio; + uint8_t qos_flags; + uint16_t rsvd10; + uint32_t bandwidth; + uint32_t deadline; + uint32_t rsvd14[3]; +} NvmeCsdCreateGroupCmd; + +typedef struct QEMU_PACKED NvmeCsdSetQosCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + int8_t prio; + uint8_t qos_flags; + uint16_t rsvd10; + uint32_t bandwidth; + uint32_t deadline; + uint32_t id; + uint32_t rsvd15[2]; +} NvmeCsdSetQosCmd; + +typedef struct QEMU_PACKED NvmeCsdDeleteGroupCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t id; + uint32_t rsvd11[5]; +} NvmeCsdDeleteGroupCmd; + +typedef struct QEMU_PACKED NvmeCsdMrsMgmtCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd[4]; + uint64_t prp1; + uint64_t prp2; + uint16_t sel:4; + uint16_t rsvd10:12; + uint16_t rsid; + uint8_t numr; + uint8_t rsvd11a; + uint16_t rsvd11b; + uint32_t rsvd12[4]; +} NvmeCsdMrsMgmtCmd; + +#endif diff --git a/hw/femu/femu.c b/hw/femu/femu.c index f0cd531ea11..248a2e694e7 100644 --- a/hw/femu/femu.c +++ b/hw/femu/femu.c @@ -802,6 +802,8 @@ static int nvme_register_extensions(FemuCtrl *n) nvme_register_bbssd(n); } else if (ZNSSD(n)) { nvme_register_znssd(n); + } else if (CSD(n)) { + nvme_register_csd(n); } else { /* TODO: For future extensions */ } @@ -962,6 +964,14 @@ static const Property femu_props[] = { DEFINE_PROP_UINT8("lnum_lun", FemuCtrl, oc_params.num_lun, 8), DEFINE_PROP_UINT8("lnum_pln", FemuCtrl, oc_params.num_pln, 2), DEFINE_PROP_UINT16("lmetasize", FemuCtrl, oc_params.sos, 16), + DEFINE_PROP_UINT64("fdm_size", FemuCtrl, csd_params.fdm_size_mb, 0), + DEFINE_PROP_UINT8("nr_cu", FemuCtrl, csd_params.nr_cu, 4), + DEFINE_PROP_UINT8("nr_thread", FemuCtrl, csd_params.nr_thread, 4), + DEFINE_PROP_UINT64("time_slice", FemuCtrl, csd_params.time_slice, 200000), + DEFINE_PROP_UINT64("context_switch_time", FemuCtrl, + csd_params.context_switch_time, 200), + DEFINE_PROP_UINT16("csf_runtime_scale", FemuCtrl, + csd_params.csf_runtime_scale, 3), DEFINE_PROP_UINT8("zns_num_ch", FemuCtrl, zns_params.zns_num_ch, 2), DEFINE_PROP_UINT8("zns_num_lun", FemuCtrl, zns_params.zns_num_lun, 4), DEFINE_PROP_UINT8("zns_num_plane", FemuCtrl, zns_params.zns_num_plane, 2), diff --git a/hw/femu/meson.build b/hw/femu/meson.build index 984e48d53f1..4a3af5f8913 100644 --- a/hw/femu/meson.build +++ b/hw/femu/meson.build @@ -1 +1,10 @@ -system_ss.add(when: 'CONFIG_FEMU_PCI', if_true: files('dma.c', 'intr.c', 'nvme-util.c', 'nvme-admin.c', 'nvme-io.c', 'femu.c', 'nossd/nop.c', 'nand/nand.c', 'timing-model/timing.c', 'ocssd/oc12.c', 'ocssd/oc20.c', 'zns/zns.c', 'zns/zftl.c','bbssd/bb.c', 'bbssd/ftl.c', 'lib/pqueue.c', 'lib/rte_ring.c', 'backend/dram.c')) +system_ss.add(when: 'CONFIG_FEMU_PCI', + if_true: files('dma.c', 'intr.c', 'nvme-util.c', + 'nvme-admin.c', 'nvme-io.c', 'femu.c', + 'nossd/nop.c', 'nand/nand.c', + 'timing-model/timing.c', 'ocssd/oc12.c', + 'ocssd/oc20.c', 'zns/zns.c', 'zns/zftl.c', + 'bbssd/bb.c', 'bbssd/ftl.c', 'csd/csd.c', + 'lib/pqueue.c', 'lib/rte_ring.c', + 'backend/dram.c')) +system_ss.add(when: 'CONFIG_FEMU_CSD_UBPF', if_true: femu_csd_ubpf) diff --git a/hw/femu/nvme-admin.c b/hw/femu/nvme-admin.c index 769c7fcf9d4..5ca5ba0e4a0 100644 --- a/hw/femu/nvme-admin.c +++ b/hw/femu/nvme-admin.c @@ -1407,6 +1407,9 @@ static uint16_t nvme_admin_cmd(FemuCtrl *n, NvmeCmd *cmd, NvmeCqe *cqe) case NVME_ADM_CMD_SECURITY_RECV: return NVME_INVALID_OPCODE | NVME_DNR; default: + if (n->ext_ops.admin_cmd_cqe) { + return n->ext_ops.admin_cmd_cqe(n, cmd, cqe); + } if (n->ext_ops.admin_cmd) { return n->ext_ops.admin_cmd(n, cmd); } @@ -1454,4 +1457,3 @@ void nvme_process_sq_admin(void *opaque) nvme_isr_notify_admin(cq); } } - diff --git a/hw/femu/nvme-io.c b/hw/femu/nvme-io.c index c1cabb09dbb..5d0317c16c1 100644 --- a/hw/femu/nvme-io.c +++ b/hw/femu/nvme-io.c @@ -145,7 +145,7 @@ static void nvme_process_cq_cpl(void *arg, int index_poller) int rc; int i; - if (BBSSD(n) || ZNSSD(n)) { + if (BBSSD(n) || ZNSSD(n) || CSD(n)) { rp = n->to_poller[index_poller]; } diff --git a/hw/femu/nvme.h b/hw/femu/nvme.h index 901cf7e5018..87603a54a12 100644 --- a/hw/femu/nvme.h +++ b/hw/femu/nvme.h @@ -1531,6 +1531,15 @@ typedef struct ZNSCtrlParams { int zns_flash_type; } ZNSCtrlParams; +typedef struct CsdCtrlParams { + uint64_t fdm_size_mb; + uint8_t nr_cu; + uint8_t nr_thread; + uint64_t time_slice; + uint64_t context_switch_time; + uint16_t csf_runtime_scale; +} CsdCtrlParams; + typedef struct OcCtrlParams { uint16_t sec_size; uint8_t secs_per_pg; @@ -1550,6 +1559,7 @@ typedef struct FemuExtCtrlOps { uint16_t (*rw_check_req)(struct FemuCtrl *, NvmeCmd *, NvmeRequest *); int (*start_ctrl)(struct FemuCtrl *); uint16_t (*admin_cmd)(struct FemuCtrl *, NvmeCmd *); + uint16_t (*admin_cmd_cqe)(struct FemuCtrl *, NvmeCmd *, NvmeCqe *); uint16_t (*io_cmd)(struct FemuCtrl *, NvmeNamespace *, NvmeCmd *, NvmeRequest *); uint16_t (*get_log)(struct FemuCtrl *, NvmeCmd *); } FemuExtCtrlOps; @@ -1680,6 +1690,7 @@ typedef struct FemuCtrl { uint8_t lver; /* Coperd: OCSSD version, 0x1 -> OC1.2, 0x2 -> OC2.0 */ uint32_t memsz; OcCtrlParams oc_params; + CsdCtrlParams csd_params; Oc12Ctrl *oc12_ctrl; volatile int64_t chip_next_avail_time[FEMU_MAX_NUM_CHIPS]; @@ -1745,6 +1756,7 @@ enum { FEMU_BBSSD_MODE = 1, FEMU_NOSSD_MODE = 2, FEMU_ZNSSD_MODE = 3, + FEMU_CSD_MODE = 4, FEMU_SMARTSSD_MODE, FEMU_KVSSD_MODE, }; @@ -1779,6 +1791,11 @@ static inline bool ZNSSD(FemuCtrl *n) return (n->femu_mode == FEMU_ZNSSD_MODE); } +static inline bool CSD(FemuCtrl *n) +{ + return (n->femu_mode == FEMU_CSD_MODE); +} + /* Basic NVMe Queue Pair operation APIs from nvme-util.c */ int nvme_check_sqid(FemuCtrl *n, uint16_t sqid); int nvme_check_cqid(FemuCtrl *n, uint16_t cqid); @@ -1847,6 +1864,7 @@ int nvme_register_ocssd20(FemuCtrl *n); int nvme_register_nossd(FemuCtrl *n); int nvme_register_bbssd(FemuCtrl *n); int nvme_register_znssd(FemuCtrl *n); +int nvme_register_csd(FemuCtrl *n); static inline uint64_t ns_blks(NvmeNamespace *ns, uint8_t lba_idx) { diff --git a/meson.build b/meson.build index 50c774a1955..356e142e502 100644 --- a/meson.build +++ b/meson.build @@ -2292,6 +2292,21 @@ endif # libbpf bpf_version = '1.1.0' libbpf = dependency('libbpf', version: '>=' + bpf_version, required: get_option('bpf'), method: 'pkg-config') +femu_csd_ubpf_path = get_option('femu_csd_ubpf_path') +if femu_csd_ubpf_path != '' + femu_csd_ubpf_lib_path = femu_csd_ubpf_path / 'build/lib/libubpf.a' + femu_csd_ubpf_lib = cc.find_library('ubpf', + dirs: femu_csd_ubpf_path / 'build/lib', + required: get_option('femu_csd_ubpf')) + femu_csd_ubpf = declare_dependency( + include_directories: include_directories(femu_csd_ubpf_path / 'vm/inc', + femu_csd_ubpf_path / 'build/vm'), + dependencies: femu_csd_ubpf_lib) + emulator_link_args += femu_csd_ubpf_lib_path +else + femu_csd_ubpf = dependency('ubpf', required: get_option('femu_csd_ubpf'), + method: 'pkg-config') +endif if libbpf.found() and not cc.links(''' #include #include @@ -2529,6 +2544,7 @@ config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_pars config_host_data.set('CONFIG_LIBATTR', have_old_libattr) config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found()) config_host_data.set('CONFIG_EBPF', libbpf.found()) +config_host_data.set('CONFIG_FEMU_CSD_UBPF', femu_csd_ubpf.found()) config_host_data.set('CONFIG_AF_XDP', libxdp.found()) config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found()) config_host_data.set('CONFIG_LIBISCSI', libiscsi.found()) diff --git a/meson_options.txt b/meson_options.txt index fff1521e580..f728738e86a 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -146,6 +146,10 @@ option('blkio', type : 'feature', value : 'auto', description: 'libblkio block device driver') option('bpf', type : 'feature', value : 'auto', description: 'eBPF support') +option('femu_csd_ubpf', type : 'feature', value : 'disabled', + description: 'uBPF runtime support for FEMU CSD') +option('femu_csd_ubpf_path', type : 'string', value : '', + description: 'Path to an external ubpf-cemu build for FEMU CSD') option('cocoa', type : 'feature', value : 'auto', description: 'Cocoa user interface (macOS only)') option('curl', type : 'feature', value : 'auto', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 0ebe6bc52a6..f24746909a4 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -116,6 +116,7 @@ meson_options_help() { printf "%s\n" ' dmg dmg image format support' printf "%s\n" ' docs Documentations build support' printf "%s\n" ' dsound DirectSound sound support' + printf "%s\n" ' femu-csd-ubpf uBPF runtime support for FEMU CSD' printf "%s\n" ' fuse FUSE block device export' printf "%s\n" ' fuse-lseek SEEK_HOLE/SEEK_DATA support for FUSE exports' printf "%s\n" ' gcrypt libgcrypt cryptography support' @@ -315,6 +316,8 @@ _meson_option_parse() { --enable-fdt) printf "%s" -Dfdt=enabled ;; --disable-fdt) printf "%s" -Dfdt=disabled ;; --enable-fdt=*) quote_sh "-Dfdt=$2" ;; + --enable-femu-csd-ubpf) printf "%s" -Dfemu_csd_ubpf=enabled ;; + --disable-femu-csd-ubpf) printf "%s" -Dfemu_csd_ubpf=disabled ;; --enable-fuse) printf "%s" -Dfuse=enabled ;; --disable-fuse) printf "%s" -Dfuse=disabled ;; --enable-fuse-lseek) printf "%s" -Dfuse_lseek=enabled ;; diff --git a/tests/femu-csd/Makefile b/tests/femu-csd/Makefile new file mode 100644 index 00000000000..105d4c6a353 --- /dev/null +++ b/tests/femu-csd/Makefile @@ -0,0 +1,29 @@ +CC ?= gcc +CXX ?= c++ +CFLAGS ?= -Wall -Wextra -O2 -g +SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC +CXX_SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC + +BPF_TARGETS := csd-vadd.bpf.o +TARGETS := csd-passthru csd-vadd.so csd-original-kernels.so + +.PHONY: all bpf clean + +all: $(TARGETS) + +bpf: $(BPF_TARGETS) + +csd-passthru: csd-passthru.c + $(CC) $(CFLAGS) -o $@ $< + +csd-vadd.so: csd-vadd.c femu-csd-kernel.h + $(CC) $(SO_CFLAGS) -shared -o $@ $< + +csd-original-kernels.so: csd-original-kernels.cc femu-csd-kernel.h + $(CXX) $(CXX_SO_CFLAGS) -shared -o $@ $< -pthread -llz4 + +csd-vadd.bpf.o: csd-vadd.bpf.c femu-csd-kernel.h + clang -target bpf -O2 -g -c -o $@ $< + +clean: + rm -f $(TARGETS) $(BPF_TARGETS) diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md new file mode 100644 index 00000000000..c075d7832e7 --- /dev/null +++ b/tests/femu-csd/README.md @@ -0,0 +1,156 @@ +# FEMU CSD Passthrough Tests + +This directory contains lightweight guest-side tools for validating FEMU CSD +vendor commands without `linux-cemu`, FDMFS, or a fixed VM image. + +Build inside a normal Linux guest: + +```bash +make +``` + +Run a basic AFDM smoke test against a namespace device: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke +``` + +The smoke test sends AFDM commands through `NVME_IOCTL_IO_CMD` and uses the +original CEMU-style admin lifecycle commands through `NVME_IOCTL_ADMIN_CMD`: + +- allocate AFDM +- write AFDM +- read AFDM +- load and activate a phantom CSF +- execute the phantom CSF +- deactivate and unload the phantom CSF +- deallocate AFDM + +Build also produces `csd-vadd.so`, a minimal shared-library CSF used by the +shared-library smoke path. The program load payload follows the original CEMU +descriptor format: a PRP data buffer containing `path\0symbol\0`. Because the +shared library is loaded by the QEMU process on the host, the `path` string +inside that descriptor must be visible to the host QEMU process: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke-so /home//FEMU/tests/femu-csd/csd-vadd.so +``` + +`make` also builds `csd-original-kernels.so`, which contains small +shared-library ports of the original CEMU `knn`, `sql`, `grep`, and `lz4` +kernels. These tests exercise the same CSD program lifecycle and inline memory +range interface as the vadd test: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke-so-all /home//FEMU/tests/femu-csd/csd-original-kernels.so +``` + +FDMFS-free MRS is available through the original CEMU memory range set +management command layout (`0x21`). The passthrough helper creates an MRS from +AFDM-backed memory range descriptors and executes a CSF by `rsid`: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke-mrs /home//FEMU/tests/femu-csd/csd-vadd.so +sudo ./csd-passthru /dev/nvme0n1 vadd-example /home//FEMU/tests/femu-csd/csd-vadd.so +``` + +The migrated sync-breakdown check measures NVM-to-AFDM copy, CSF execution, and +AFDM read as separate stages: + +```bash +sudo ./csd-passthru /dev/nvme0n1 sync-breakdown /home//FEMU/tests/femu-csd/csd-vadd.so 4096 16 +``` + +The indirect vadd smoke keeps the original indirect CSF ABI shape and uses an +AFDM-backed MRS instead of FDMFS files: + +```bash +sudo ./csd-passthru /dev/nvme0n1 indirect-vadd /home//FEMU/tests/femu-csd/csd-vadd.so +``` + +A compact benchmark entry covers vadd plus the original kernel smoke set: + +```bash +sudo ./csd-passthru /dev/nvme0n1 benchmark-kernels /home//FEMU/tests/femu-csd/csd-vadd.so /home//FEMU/tests/femu-csd/csd-original-kernels.so 1 +``` + +The shared-library CSF ABI is: + +```c +int64_t kernel(struct femu_csd_args *args); +``` + +The execute command uses a CEMU-style program execute command body: +`pind`, `numr`, `dlen`, `cparam1`, `cparam2`, `group`, and `runtime` are sent +in the command. Because this lightweight test path intentionally avoids MRS and +FDMFS, it sends inline memory ranges in the PRP data buffer. In those test +ranges, `nsid=0` means AFDM, `sb` is the AFDM id, and `len=0` means the full +AFDM allocation. The CSF ABI then sees `args->mr_addr[0]` as the output AFDM +and `args->mr_addr[1]` as the input AFDM. + +Other useful command-level checks: + +```bash +sudo ./csd-passthru /dev/nvme0n1 alloc 4096 +sudo ./csd-passthru /dev/nvme0n1 create-group 5 0 0 +sudo ./csd-passthru /dev/nvme0n1 set-qos 6 0 0 +sudo ./csd-passthru /dev/nvme0n1 exec 0 +sudo ./csd-passthru /dev/nvme0n1 delete-group +sudo ./csd-passthru /dev/nvme0n1 nvm-to-afdm 0 0 0 +sudo ./csd-passthru /dev/nvme0n1 bench 4096 32 +sudo ./csd-passthru /dev/nvme0n1 bench 65536 16 +``` + +The `bench` command reports wall-clock average latency for AFDM write, AFDM +read, and NVM-to-AFDM copy. It is intended as a regression check for the CSD +command path, not a final paper-level benchmark harness. + +FEMU CSD also accepts the original CEMU program lifecycle admin command +layouts for load/unload (`0x22`) and activate/deactivate (`0x23`). The +lightweight passthrough helper sends those commands to the controller device +without the CEMU kernel driver: + +```bash +sudo ./csd-passthru /dev/nvme0 admin-load-phantom 1 1000 +sudo ./csd-passthru /dev/nvme0 admin-load-so 1 /host/path/csd-vadd.so csd_vadd +sudo ./csd-passthru /dev/nvme0 admin-load-ubpf 1 /host/path/csf.bpf.o csf_symbol 0 +sudo ./csd-passthru /dev/nvme0 admin-activate 1 +sudo ./csd-passthru /dev/nvme0 admin-deactivate 1 +sudo ./csd-passthru /dev/nvme0 admin-unload 1 +sudo ./csd-passthru /dev/nvme0 admin-create-mrs +sudo ./csd-passthru /dev/nvme0 admin-delete-mrs +``` + +The tool assumes FEMU was started with CSD mode enabled, for example: + +```bash +-device femu,femu_mode=4,fdm_size=64 +``` + +It intentionally does not depend on CEMU's modified kernel driver or FDMFS. CSD +mode still uses FEMU's device-side BBSSD FTL path for normal NVM read/write +requests; the passthrough commands validate the additional computational +storage interface. + +Shared-library CSF support is enabled in the default FEMU build. uBPF support +is optional because it depends on an external `ubpf` library. If `ubpf` is +installed through pkg-config, build FEMU with: + +```bash +./femu-compile.sh --enable-csd-ubpf +``` + +If you use the `ubpf-cemu` source tree directly, pass its path explicitly: + +```bash +./femu-compile.sh --enable-csd-ubpf=/home//CEMU-FEMU/ubpf-cemu +``` + +The guest helper does not build BPF objects by default. Build the BPF test +program on the host or in a guest with Clang BPF support: + +```bash +make bpf +sudo ./csd-passthru /dev/nvme0n1 smoke-ubpf /host/path/csd-vadd.bpf.o 0 +sudo ./csd-passthru /dev/nvme0n1 smoke-ubpf /host/path/csd-vadd.bpf.o 1 +``` diff --git a/tests/femu-csd/csd-original-kernels.cc b/tests/femu-csd/csd-original-kernels.cc new file mode 100644 index 00000000000..fca043565e4 --- /dev/null +++ b/tests/femu-csd/csd-original-kernels.cc @@ -0,0 +1,148 @@ +#include +#include +#include +#include + +#include + +#include "femu-csd-kernel.h" + +struct KnnNode { + char tag[64]; + char vector[4096]; +}; + +static int knn_distance(const int *query, const char *vector) +{ + int distance = 0; + + for (size_t i = 0; i < 4096; ++i) { + int diff = query[i] - (vector[i] - '0'); + + distance += diff * diff; + } + + return distance; +} + +static void knn_chunk(const KnnNode *nodes, const int *query, + size_t start, size_t end, int *distances) +{ + for (size_t i = start; i < end; ++i) { + distances[i] = knn_distance(query, nodes[i].vector); + } +} + +extern "C" long long csd_knn(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const KnnNode *nodes = static_cast(args->mr_addr[0]); + int *output = static_cast(args->mr_addr[1]); + size_t nr_vector = args->mr_len[0] / static_cast(sizeof(KnnNode)); + int query[4096] = { 0 }; + size_t nr_threads = std::min(2, std::max(1, nr_vector)); + size_t chunk = (nr_vector + nr_threads - 1) / nr_threads; + std::vector threads; + + for (size_t t = 0; t < nr_threads; ++t) { + size_t start = t * chunk; + size_t end = std::min(start + chunk, nr_vector); + + if (start < end) { + threads.emplace_back(knn_chunk, nodes, query, start, end, output); + } + } + + for (auto &thread : threads) { + thread.join(); + } + + return nr_vector; +} + +static long long sql_query_records(const char *data, size_t start, size_t end, + int year_lower, int year_upper, char *output) +{ + static constexpr int record_length = 32; + long long output_size = 0; + + for (size_t i = start; i + record_length <= end; i += record_length) { + const char *record = data + i; + int year = ((record[30] - '0') << 8) | static_cast(record[31] - '0'); + + if (year >= year_lower && year <= year_upper) { + memcpy(output + output_size, record, record_length); + output_size += record_length; + } + } + + return output_size; +} + +extern "C" long long csd_sql(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const char *data = static_cast(args->mr_addr[0]); + char *output = static_cast(args->mr_addr[1]); + int year_lower = args->cparam1 ? args->cparam1 : 50; + int year_upper = args->cparam2 ? args->cparam2 : 60; + + return sql_query_records(data, 0, args->mr_len[0], year_lower, year_upper, output); +} + +static long long grep_rows(const char *data, int rows, int cols, const char *pattern) +{ + int pattern_length = strlen(pattern); + long long matches = 0; + + for (int r = 0; r < rows; ++r) { + const char *line = data + r * cols; + + for (int c = 0; c <= cols - pattern_length; ++c) { + if (!strncmp(line + c, pattern, pattern_length)) { + matches++; + } + } + } + + return matches * 8; +} + +extern "C" long long csd_grep(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const char *data = static_cast(args->mr_addr[0]); + const char *pattern = static_cast(args->mr_addr[1]); + int cols = args->cparam2 ? args->cparam2 : 1024; + int rows = args->cparam1 ? args->cparam1 : args->mr_len[0] / cols; + + return grep_rows(data, rows, cols, pattern); +} + +extern "C" long long csd_lz4(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const char *input = static_cast(args->mr_addr[0]); + char *output = static_cast(args->mr_addr[1]); + long long input_size = args->mr_len[0]; + long long output_size = args->mr_len[1]; + int max_compressed_size = LZ4_compressBound(input_size); + + if (max_compressed_size <= 0 || output_size < max_compressed_size) { + return -1; + } + + return LZ4_compress_default(input, output, input_size, output_size); +} diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c new file mode 100644 index 00000000000..6653f0610cc --- /dev/null +++ b/tests/femu-csd/csd-passthru.c @@ -0,0 +1,1513 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + CSD_ADM_MRS_MGMT = 0x21, + CSD_ADM_COMPUTE_LOAD = 0x22, + CSD_ADM_COMPUTE_LOAD_DATA = 0x25, + CSD_ADM_COMPUTE_ACTIVATE = 0x23, + CSD_CMD_ALLOC_FDM = 0xb0, + CSD_CMD_DEALLOC_AFDM = 0xc0, + CSD_CMD_NVM_TO_AFDM = 0xd0, + CSD_CMD_EXEC = 0xe1, + CSD_CMD_READ_AFDM = 0xf2, + CSD_CMD_WRITE_AFDM = 0xf5, + CSD_CMD_CREATE_GROUP = 0xf6, + CSD_CMD_SET_QOS = 0xf7, + CSD_CMD_DELETE_GROUP = 0xf8, +}; + +enum { + CSD_CSF_TYPE_PHANTOM = 0, + CSD_CSF_TYPE_EBPF = 1, + CSD_CSF_TYPE_SHARED_LIB = 3, + CSD_LOAD_FLAG_JIT = 1U << 0, + CSD_LOAD_FLAG_INDIRECT = 1U << 1, +}; + +enum { + CSD_MR_AFDM_NSID = 0, +}; + +struct csd_memory_range { + uint32_t nsid; + uint32_t len; + uint64_t sb; + uint64_t rsvd[2]; +} __attribute__((packed)); + +struct csd_program_execute_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint16_t pind; + uint16_t rsid; + uint32_t numr; + uint32_t dlen; + uint32_t rsvd; + uint64_t prp1; + uint64_t prp2; + uint64_t cparam1; + uint64_t cparam2; + uint32_t group:8; + uint32_t chunk_nlb:24; + uint32_t runtime; +} __attribute__((packed)); + +struct csd_mrs_mgmt_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd[4]; + uint64_t prp1; + uint64_t prp2; + uint16_t sel:4; + uint16_t rsvd10:12; + uint16_t rsid; + uint8_t numr; + uint8_t rsvd11a; + uint16_t rsvd11b; + uint32_t rsvd12[4]; +} __attribute__((packed)); + +static void usage(const char *prog) +{ + fprintf(stderr, + "Usage:\n" + " %s /dev/nvmeXnY smoke\n" + " %s /dev/nvmeXnY alloc \n" + " %s /dev/nvmeXnY dealloc \n" + " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1] [cparam2]\n" + " %s /dev/nvmeXnY smoke-so \n" + " %s /dev/nvmeXnY smoke-so-all \n" + " %s /dev/nvmeXnY smoke-ubpf [jit:0|1]\n" + " %s /dev/nvmeXnY smoke-mrs \n" + " %s /dev/nvmeXnY vadd-example \n" + " %s /dev/nvmeXnY sync-breakdown \n" + " %s /dev/nvmeXnY indirect-vadd \n" + " %s /dev/nvmeXnY benchmark-kernels \n" + " %s /dev/nvmeXnY bench \n" + " %s /dev/nvmeX admin-load-so [runtime-ns]\n" + " %s /dev/nvmeX admin-load-ubpf [jit:0|1] [runtime-ns]\n" + " %s /dev/nvmeX admin-load-phantom \n" + " %s /dev/nvmeX admin-activate \n" + " %s /dev/nvmeX admin-deactivate \n" + " %s /dev/nvmeX admin-unload \n" + " %s /dev/nvmeXnY create-group \n" + " %s /dev/nvmeXnY set-qos \n" + " %s /dev/nvmeXnY delete-group \n" + " %s /dev/nvmeX admin-create-mrs \n" + " %s /dev/nvmeX admin-delete-mrs \n" + " %s /dev/nvmeXnY write \n" + " %s /dev/nvmeXnY read \n" + " %s /dev/nvmeXnY nvm-to-afdm \n", + prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, + prog, prog, prog, prog, prog, prog, + prog, prog, prog, prog, prog, prog, prog, prog, prog, prog); +} + +static uint64_t monotonic_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + +static uint64_t parse_u64(const char *s, const char *name) +{ + char *end = NULL; + uint64_t v; + + errno = 0; + v = strtoull(s, &end, 0); + if (errno || !end || *end) { + fprintf(stderr, "invalid %s: %s\n", name, s); + exit(EXIT_FAILURE); + } + + return v; +} + +static int submit(int fd, struct nvme_passthru_cmd *cmd) +{ + int ret = ioctl(fd, NVME_IOCTL_IO_CMD, cmd); + + if (ret < 0) { + perror("NVME_IOCTL_IO_CMD"); + return -1; + } + if (ret > 0) { + fprintf(stderr, "NVME_IOCTL_IO_CMD status=0x%x result=0x%x\n", + ret, cmd->result); + return -1; + } + + return 0; +} + +static int submit_admin(int fd, struct nvme_passthru_cmd *cmd) +{ + int ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd); + + if (ret < 0) { + perror("NVME_IOCTL_ADMIN_CMD"); + return -1; + } + if (ret > 0) { + fprintf(stderr, "NVME_IOCTL_ADMIN_CMD status=0x%x result=0x%x\n", + ret, cmd->result); + return -1; + } + + return 0; +} + +static int open_admin_from_namespace(const char *dev) +{ + char ctrl[PATH_MAX]; + char *base; + char *name; + char *ns; + int fd; + + if (strlen(dev) >= sizeof(ctrl)) { + fprintf(stderr, "device path too long: %s\n", dev); + exit(EXIT_FAILURE); + } + + strcpy(ctrl, dev); + base = strrchr(ctrl, '/'); + name = base ? base + 1 : ctrl; + ns = strstr(name, "nvme"); + if (ns) { + ns = strchr(ns + strlen("nvme"), 'n'); + if (ns) { + *ns = '\0'; + } + } + + fd = open(ctrl, O_RDWR); + if (fd < 0) { + perror(ctrl); + exit(EXIT_FAILURE); + } + + return fd; +} + +static void csd_admin_load_program(int fd, uint16_t pind, uint8_t type, + const char *path, const char *symbol, + uint8_t flags, uint32_t runtime) +{ + size_t path_len = path ? strlen(path) : 0; + size_t symbol_len = symbol ? strlen(symbol) : 0; + size_t size = path_len + symbol_len + (path ? 2 : 0); + void *buf = NULL; + uint32_t cdw10 = pind | ((uint32_t)type << 16); + struct nvme_passthru_cmd cmd = { + .opcode = size ? CSD_ADM_COMPUTE_LOAD_DATA : CSD_ADM_COMPUTE_LOAD, + .nsid = 1, + .data_len = size, + .cdw2 = ((uint32_t)flags & 0x1), + .cdw3 = runtime, + .cdw10 = cdw10 | ((flags & CSD_LOAD_FLAG_INDIRECT) ? (1U << 28) : 0), + .cdw11 = (uint32_t)size, + .cdw14 = (uint32_t)size, + }; + + if (size) { + if (posix_memalign(&buf, 4096, (size + 4095) & ~4095ULL)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + memset(buf, 0, (size + 4095) & ~4095ULL); + memcpy(buf, path, path_len); + memcpy((char *)buf + path_len + 1, symbol, symbol_len); + cmd.addr = (uintptr_t)buf; + } + + if (submit_admin(fd, &cmd)) { + free(buf); + exit(EXIT_FAILURE); + } + + free(buf); +} + +static void csd_admin_unload_program(int fd, uint16_t pind) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_ADM_COMPUTE_LOAD, + .nsid = 1, + .cdw10 = pind | (1U << 24), + }; + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_admin_activation(int fd, uint16_t pind, uint8_t sel) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_ADM_COMPUTE_ACTIVATE, + .nsid = 1, + .cdw10 = pind | ((uint32_t)sel << 16), + }; + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static uint16_t csd_admin_create_mrs(int fd, const struct csd_memory_range *ranges, + uint8_t numr) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_mrs_mgmt_cmd *mrs = (struct csd_mrs_mgmt_cmd *)&cmd; + + mrs->opcode = CSD_ADM_MRS_MGMT; + mrs->nsid = 1; + mrs->sel = 0; + mrs->numr = numr; + cmd.addr = (uintptr_t)ranges; + cmd.data_len = numr * sizeof(*ranges); + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return (uint16_t)cmd.result; +} + +static void csd_admin_delete_mrs(int fd, uint16_t rsid) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_mrs_mgmt_cmd *mrs = (struct csd_mrs_mgmt_cmd *)&cmd; + + mrs->opcode = CSD_ADM_MRS_MGMT; + mrs->nsid = 1; + mrs->sel = 1; + mrs->rsid = rsid; + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static uint32_t csd_exec_ranges(int fd, uint32_t pind, uint32_t mr0_afdm_id, + uint32_t mr1_afdm_id, uint32_t runtime, + uint32_t group_id, uint64_t cparam1, + uint64_t cparam2) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_program_execute_cmd *exec = + (struct csd_program_execute_cmd *)&cmd; + struct csd_memory_range ranges[2] = { + { + .nsid = CSD_MR_AFDM_NSID, + .len = 0, + .sb = mr0_afdm_id, + }, + { + .nsid = CSD_MR_AFDM_NSID, + .len = 0, + .sb = mr1_afdm_id, + }, + }; + + exec->opcode = CSD_CMD_EXEC; + exec->nsid = 1; + exec->pind = pind; + exec->numr = 2; + exec->cparam1 = cparam1; + exec->cparam2 = cparam2; + exec->group = group_id; + exec->runtime = runtime; + + cmd.addr = (uintptr_t)ranges; + cmd.data_len = sizeof(ranges); + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static uint32_t csd_exec_mrs(int fd, uint32_t pind, uint16_t rsid, + uint32_t runtime, uint32_t group_id, + uint64_t cparam1, uint64_t cparam2) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_program_execute_cmd *exec = + (struct csd_program_execute_cmd *)&cmd; + + exec->opcode = CSD_CMD_EXEC; + exec->nsid = 1; + exec->pind = pind; + exec->rsid = rsid; + exec->cparam1 = cparam1; + exec->cparam2 = cparam2; + exec->group = group_id; + exec->runtime = runtime; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static uint32_t csd_exec(int fd, uint32_t pind, uint32_t in_afdm_id, + uint32_t out_afdm_id, uint32_t runtime, + uint32_t group_id, uint64_t cparam1, + uint64_t cparam2) +{ + return csd_exec_ranges(fd, pind, out_afdm_id, in_afdm_id, runtime, + group_id, cparam1, cparam2); +} + +static uint32_t csd_create_group(int fd, int8_t prio, uint32_t bandwidth, + uint32_t deadline) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_CREATE_GROUP, + .nsid = 1, + .cdw10 = (uint8_t)prio, + .cdw11 = bandwidth, + .cdw12 = deadline, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static void csd_set_qos(int fd, uint32_t group_id, int8_t prio, + uint32_t bandwidth, uint32_t deadline) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_SET_QOS, + .nsid = 1, + .cdw10 = (uint8_t)prio, + .cdw11 = bandwidth, + .cdw12 = deadline, + .cdw13 = group_id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_delete_group(int fd, uint32_t group_id) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_DELETE_GROUP, + .nsid = 1, + .cdw10 = group_id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static uint32_t csd_alloc(int fd, uint64_t size) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_ALLOC_FDM, + .nsid = 1, + .cdw10 = (uint32_t)size, + .cdw11 = (uint32_t)(size >> 32), + .cdw12 = 0, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static void csd_dealloc(int fd, uint32_t id) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_DEALLOC_AFDM, + .nsid = 1, + .cdw10 = id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_write(int fd, uint32_t id, uint64_t offset, const void *buf, + uint32_t size) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_WRITE_AFDM, + .nsid = 1, + .addr = (uintptr_t)buf, + .data_len = size, + .cdw10 = (uint32_t)offset, + .cdw11 = (uint32_t)(offset >> 32), + .cdw12 = size, + .cdw13 = 0, + .cdw14 = id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_read(int fd, uint32_t id, uint64_t offset, void *buf, + uint32_t size) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_READ_AFDM, + .nsid = 1, + .addr = (uintptr_t)buf, + .data_len = size, + .cdw10 = (uint32_t)offset, + .cdw11 = (uint32_t)(offset >> 32), + .cdw12 = size, + .cdw13 = 0, + .cdw14 = id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_nvm_to_afdm(int fd, uint32_t id, uint64_t offset, + uint64_t slba, uint16_t nlb) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_NVM_TO_AFDM, + .nsid = 1, + .cdw10 = (uint32_t)slba, + .cdw11 = (uint32_t)(slba >> 32), + .cdw12 = nlb, + .cdw13 = id, + .cdw14 = (uint32_t)offset, + .cdw15 = (uint32_t)(offset >> 32), + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void dump_hex(const uint8_t *buf, size_t size) +{ + for (size_t i = 0; i < size; i++) { + printf("%02x%s", buf[i], (i + 1) % 16 == 0 ? "\n" : " "); + } + if (size % 16) { + printf("\n"); + } +} + +static void run_smoke(const char *dev, int fd) +{ + const char *msg = "femu-csd-afdm-smoke"; + size_t msg_len = strlen(msg) + 1; + uint8_t *write_buf = NULL; + uint8_t *read_buf = NULL; + uint32_t id; + uint16_t csf_id = 1; + int admin_fd; + + if (posix_memalign((void **)&write_buf, 4096, 4096) || + posix_memalign((void **)&read_buf, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + memset(write_buf, 0, 4096); + memset(read_buf, 0, 4096); + memcpy(write_buf, msg, msg_len); + + id = csd_alloc(fd, 4096); + printf("allocated AFDM id=%" PRIu32 "\n", id); + + csd_write(fd, id, 0, write_buf, 4096); + csd_read(fd, id, 0, read_buf, 4096); + + if (memcmp(write_buf, read_buf, 4096)) { + fprintf(stderr, "AFDM smoke mismatch\n"); + exit(EXIT_FAILURE); + } + + admin_fd = open_admin_from_namespace(dev); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_PHANTOM, + NULL, NULL, 0, 1000); + csd_admin_activation(admin_fd, csf_id, 1); + printf("loaded phantom CSF id=%" PRIu16 "\n", csf_id); + csd_exec(fd, csf_id, id, id, 0, 0, 0, 0); + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + close(admin_fd); + printf("phantom exec passed\n"); + + csd_dealloc(fd, id); + printf("AFDM smoke passed\n"); + + free(write_buf); + free(read_buf); +} + +static void run_so_smoke(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t csf_id = 1; + int admin_fd; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i * 2; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + admin_fd = open_admin_from_namespace(dev); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + printf("loaded shared-library CSF id=%" PRIu16 "\n", csf_id); + csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = i + i * 2; + + if (output[i] != expected) { + fprintf(stderr, "shared-library smoke mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + close(admin_fd); + printf("shared-library smoke passed\n"); + + free(input); + free(output); +} + +static void run_ubpf_smoke(const char *dev, int fd, const char *elf_path, + uint8_t jit) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t csf_id = 5; + int admin_fd; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i * 3; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + admin_fd = open_admin_from_namespace(dev); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_EBPF, + elf_path, "csd_vadd_bpf", jit, 0); + csd_admin_activation(admin_fd, csf_id, 1); + printf("loaded uBPF CSF id=%" PRIu16 " jit=%u\n", csf_id, jit); + csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = i + i * 3; + + if (output[i] != expected) { + fprintf(stderr, "uBPF smoke mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + close(admin_fd); + printf("uBPF smoke passed\n"); + + free(input); + free(output); +} + +static void run_mrs_smoke(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t rsid; + uint16_t csf_id = 7; + int admin_fd; + struct csd_memory_range ranges[2]; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i * 4; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].len = 0; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].len = 0; + ranges[1].sb = in_id; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 2); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + printf("created MRS rsid=%" PRIu16 "\n", rsid); + csd_exec_mrs(fd, csf_id, rsid, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = i + i * 4; + + if (output[i] != expected) { + fprintf(stderr, "MRS smoke mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("MRS shared-library smoke passed\n"); + + free(input); + free(output); +} + +static void run_vadd_example(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t rsid; + uint16_t csf_id = 8; + int admin_fd; + struct csd_memory_range ranges[2]; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i * 2; + input[i * 2 + 1] = i * 2 + 1; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = in_id; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 2); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + csd_exec_mrs(fd, csf_id, rsid, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = input[i * 2] + input[i * 2 + 1]; + + if (output[i] != expected) { + fprintf(stderr, "vadd example mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("vadd example passed\n"); + + free(input); + free(output); +} + +static void run_sync_breakdown(const char *dev, int fd, const char *so_path, + uint32_t bytes, uint32_t iterations) +{ + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t rsid; + uint16_t csf_id = 9; + int admin_fd; + struct csd_memory_range ranges[2]; + uint64_t copy_time = 0; + uint64_t exec_time = 0; + uint64_t read_time = 0; + uint64_t start; + uint64_t end; + uint32_t count; + uint32_t in_bytes; + + if (bytes == 0 || iterations == 0 || bytes % sizeof(int)) { + fprintf(stderr, "sync-breakdown requires non-zero int-aligned bytes and iterations\n"); + exit(EXIT_FAILURE); + } + count = bytes / sizeof(int); + in_bytes = bytes * 2; + + if (posix_memalign((void **)&input, 4096, (in_bytes + 4095U) & ~4095U) || + posix_memalign((void **)&output, 4096, (bytes + 4095U) & ~4095U)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (uint32_t i = 0; i < count; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i + 1; + output[i] = 0; + } + + in_id = csd_alloc(fd, in_bytes); + out_id = csd_alloc(fd, bytes); + csd_write(fd, out_id, 0, output, bytes); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = in_id; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 2); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + + if (pwrite(fd, input, in_bytes, 0) != (ssize_t)in_bytes) { + perror("pwrite nvm"); + exit(EXIT_FAILURE); + } + fsync(fd); + + for (uint32_t i = 0; i < iterations; i++) { + start = monotonic_ns(); + csd_nvm_to_afdm(fd, in_id, 0, 0, + (uint16_t)((in_bytes + 511U) / 512U - 1)); + end = monotonic_ns(); + copy_time += end - start; + + start = monotonic_ns(); + csd_exec_mrs(fd, csf_id, rsid, 0, 0, count, 0); + end = monotonic_ns(); + exec_time += end - start; + + start = monotonic_ns(); + csd_read(fd, out_id, 0, output, bytes); + end = monotonic_ns(); + read_time += end - start; + } + + for (uint32_t i = 0; i < count; i++) { + int expected = input[i * 2] + input[i * 2 + 1]; + + if (output[i] != expected) { + fprintf(stderr, "sync breakdown mismatch at %u: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + printf("breakdown nvm_to_afdm bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + in_bytes, iterations, copy_time / iterations); + printf("breakdown exec bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + bytes, iterations, exec_time / iterations); + printf("breakdown afdm_read bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + bytes, iterations, read_time / iterations); + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + + free(input); + free(output); +} + +static void run_so_smoke(const char *dev, int fd, const char *so_path); +static void run_original_so_smoke(const char *dev, int fd, const char *so_path); + +static void run_indirect_vadd(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + int *global_mem = NULL; + uint32_t in_id; + uint32_t out_id; + uint32_t global_id; + uint16_t rsid; + uint16_t csf_id = 10; + int admin_fd; + struct csd_memory_range ranges[3]; + uint8_t task_info[16 + sizeof(int)] = { 0 }; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096) || + posix_memalign((void **)&global_mem, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i + 7; + output[i] = 0; + } + global_mem[0] = 0; + global_mem[1] = 0; + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + global_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + csd_write(fd, global_id, 0, global_mem, 4096); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = in_id; + ranges[2].nsid = CSD_MR_AFDM_NSID; + ranges[2].sb = global_id; + + /* + * Original CEMU indirect execute starts with: + * nr_concurrent_chunks, destination, nr_total_input_cf2, nr_total_output_cf2. + * This FDMFS-free smoke uses pre-filled AFDM ranges, so copy-format lists + * are intentionally empty while the indirect CSF ABI is still exercised. + */ + ((int *)task_info)[0] = 1; + ((int *)task_info)[1] = 0; + ((int *)task_info)[2] = 0; + ((int *)task_info)[3] = 0; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 3); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd_indirect", + CSD_LOAD_FLAG_INDIRECT, 0); + csd_admin_activation(admin_fd, csf_id, 1); + if (csd_exec_mrs(fd, csf_id, rsid, 0, 0, COUNT, 0) == 0) { + fprintf(stderr, "indirect vadd returned zero blocks\n"); + exit(EXIT_FAILURE); + } + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = input[i * 2] + input[i * 2 + 1]; + + if (output[i] != expected) { + fprintf(stderr, "indirect vadd mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + csd_dealloc(fd, global_id); + printf("indirect vadd smoke passed\n"); + + free(input); + free(output); + free(global_mem); +} + +static void run_benchmark_kernels(const char *dev, int fd, const char *vadd_so, + const char *kernels_so, uint32_t iterations) +{ + uint64_t start; + uint64_t end; + + if (iterations == 0) { + fprintf(stderr, "benchmark-kernels requires non-zero iterations\n"); + exit(EXIT_FAILURE); + } + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + run_so_smoke(dev, fd, vadd_so); + } + end = monotonic_ns(); + printf("benchmark-kernel name=vadd iterations=%u avg_ns=%" PRIu64 "\n", + iterations, (end - start) / iterations); + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + run_original_so_smoke(dev, fd, kernels_so); + } + end = monotonic_ns(); + printf("benchmark-kernel name=knn_sql_grep_lz4 iterations=%u avg_ns=%" PRIu64 "\n", + iterations, (end - start) / iterations); +} + +static void run_original_so_smoke(const char *dev, int fd, const char *so_path) +{ + int admin_fd = open_admin_from_namespace(dev); + uint32_t in_id; + uint32_t out_id; + uint8_t *input = NULL; + uint8_t *output = NULL; + uint32_t pattern_id; + uint8_t *pattern = NULL; + + if (posix_memalign((void **)&input, 4096, 65536) || + posix_memalign((void **)&output, 4096, 65536) || + posix_memalign((void **)&pattern, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + memset(input, 0, 65536); + memset(output, 0, 65536); + memset(pattern, 0, 4096); + + enum { KNN_NODES = 4, KNN_NODE_SIZE = 4160 }; + for (int n = 0; n < KNN_NODES; n++) { + uint8_t *node = input + n * KNN_NODE_SIZE; + + memset(node, 'A' + n, 64); + memset(node + 64, '0' + n, 4096); + } + in_id = csd_alloc(fd, KNN_NODES * KNN_NODE_SIZE); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, KNN_NODES * KNN_NODE_SIZE); + csd_admin_load_program(admin_fd, 2, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_knn", 0, 0); + csd_admin_activation(admin_fd, 2, 1); + csd_exec_ranges(fd, 2, in_id, out_id, 0, 0, 0, 0); + csd_read(fd, out_id, 0, output, 4096); + for (int i = 0; i < KNN_NODES; i++) { + if (((int *)output)[i] < 0) { + fprintf(stderr, "knn smoke invalid distance at %d\n", i); + exit(EXIT_FAILURE); + } + } + csd_admin_activation(admin_fd, 2, 0); + csd_admin_unload_program(admin_fd, 2); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("knn shared-library smoke passed\n"); + + memset(input, 'x', 65536); + memset(output, 0, 65536); + for (int r = 0; r < 8; r++) { + char *record = (char *)input + r * 32; + + memset(record, 'A' + r, 32); + record[30] = '0'; + record[31] = (r % 2) ? ('0' + 55) : ('0' + 70); + } + in_id = csd_alloc(fd, 4096); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 4096); + csd_write(fd, out_id, 0, output, 4096); + csd_admin_load_program(admin_fd, 3, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_sql", 0, 0); + csd_admin_activation(admin_fd, 3, 1); + if (csd_exec_ranges(fd, 3, in_id, out_id, 0, 0, 50, 60) != 4 * 32) { + fprintf(stderr, "sql smoke unexpected result\n"); + exit(EXIT_FAILURE); + } + csd_admin_activation(admin_fd, 3, 0); + csd_admin_unload_program(admin_fd, 3); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("sql shared-library smoke passed\n"); + + memset(input, 'Z', 65536); + memcpy(input + 32, "needle", 6); + memcpy(input + 96, "needle", 6); + memcpy(pattern, "needle", 7); + in_id = csd_alloc(fd, 4096); + pattern_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 4096); + csd_write(fd, pattern_id, 0, pattern, 4096); + csd_admin_load_program(admin_fd, 4, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_grep", 0, 0); + csd_admin_activation(admin_fd, 4, 1); + if (csd_exec_ranges(fd, 4, in_id, pattern_id, 0, 0, 4, 1024) != 16) { + fprintf(stderr, "grep smoke unexpected result\n"); + exit(EXIT_FAILURE); + } + csd_admin_activation(admin_fd, 4, 0); + csd_admin_unload_program(admin_fd, 4); + csd_dealloc(fd, in_id); + csd_dealloc(fd, pattern_id); + printf("grep shared-library smoke passed\n"); + + memset(input, 'L', 4096); + memset(output, 0, 65536); + in_id = csd_alloc(fd, 4096); + out_id = csd_alloc(fd, 8192); + csd_write(fd, in_id, 0, input, 4096); + csd_write(fd, out_id, 0, output, 8192); + csd_admin_load_program(admin_fd, 6, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_lz4", 0, 0); + csd_admin_activation(admin_fd, 6, 1); + if (csd_exec_ranges(fd, 6, in_id, out_id, 0, 0, 0, 0) == 0) { + fprintf(stderr, "lz4 smoke unexpected result\n"); + exit(EXIT_FAILURE); + } + csd_admin_activation(admin_fd, 6, 0); + csd_admin_unload_program(admin_fd, 6); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("lz4 shared-library smoke passed\n"); + + close(admin_fd); + free(input); + free(output); + free(pattern); +} + +static void run_bench(int fd, uint32_t size, uint32_t iterations) +{ + uint8_t *buf = NULL; + uint8_t *read_buf = NULL; + uint32_t id; + uint64_t start; + uint64_t end; + + if (size == 0 || iterations == 0) { + fprintf(stderr, "bench requires non-zero bytes and iterations\n"); + exit(EXIT_FAILURE); + } + if (posix_memalign((void **)&buf, 4096, (size + 4095U) & ~4095U) || + posix_memalign((void **)&read_buf, 4096, (size + 4095U) & ~4095U)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + memset(buf, 0x5a, (size + 4095U) & ~4095U); + memset(read_buf, 0, (size + 4095U) & ~4095U); + + id = csd_alloc(fd, size); + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + csd_write(fd, id, 0, buf, size); + } + end = monotonic_ns(); + printf("bench afdm_write bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + size, iterations, (end - start) / iterations); + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + csd_read(fd, id, 0, read_buf, size); + } + end = monotonic_ns(); + printf("bench afdm_read bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + size, iterations, (end - start) / iterations); + + if (pwrite(fd, buf, size, 0) != size) { + perror("pwrite nvm"); + exit(EXIT_FAILURE); + } + fsync(fd); + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + csd_nvm_to_afdm(fd, id, 0, 0, (uint16_t)((size + 511U) / 512U - 1)); + } + end = monotonic_ns(); + printf("bench nvm_to_afdm bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + size, iterations, (end - start) / iterations); + + csd_dealloc(fd, id); + free(buf); + free(read_buf); +} + +int main(int argc, char **argv) +{ + const char *dev; + const char *op; + int fd; + + if (argc < 3) { + usage(argv[0]); + return EXIT_FAILURE; + } + + dev = argv[1]; + op = argv[2]; + fd = open(dev, O_RDWR); + if (fd < 0) { + perror(dev); + return EXIT_FAILURE; + } + + if (!strcmp(op, "smoke")) { + run_smoke(dev, fd); + } else if (!strcmp(op, "smoke-so")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_so_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "smoke-so-all")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_original_so_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "smoke-ubpf")) { + uint8_t jit = 0; + + if (argc < 4 || argc > 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc == 5) { + jit = (uint8_t)parse_u64(argv[4], "jit"); + } + run_ubpf_smoke(dev, fd, argv[3], jit ? 1 : 0); + } else if (!strcmp(op, "smoke-mrs")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_mrs_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "vadd-example")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_vadd_example(dev, fd, argv[3]); + } else if (!strcmp(op, "sync-breakdown")) { + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_sync_breakdown(dev, fd, argv[3], + (uint32_t)parse_u64(argv[4], "bytes"), + (uint32_t)parse_u64(argv[5], "iterations")); + } else if (!strcmp(op, "indirect-vadd")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_indirect_vadd(dev, fd, argv[3]); + } else if (!strcmp(op, "benchmark-kernels")) { + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_benchmark_kernels(dev, fd, argv[3], argv[4], + (uint32_t)parse_u64(argv[5], "iterations")); + } else if (!strcmp(op, "bench")) { + if (argc != 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_bench(fd, (uint32_t)parse_u64(argv[3], "bytes"), + (uint32_t)parse_u64(argv[4], "iterations")); + } else if (!strcmp(op, "alloc")) { + uint64_t size; + uint32_t id; + + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + size = parse_u64(argv[3], "bytes"); + id = csd_alloc(fd, size); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "dealloc")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_dealloc(fd, (uint32_t)parse_u64(argv[3], "id")); + } else if (!strcmp(op, "admin-load-so")) { + uint32_t runtime = 0; + + if (argc < 6 || argc > 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc == 7) { + runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); + } + csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), + CSD_CSF_TYPE_SHARED_LIB, argv[4], argv[5], + 0, runtime); + } else if (!strcmp(op, "admin-load-ubpf")) { + uint32_t runtime = 0; + uint8_t jit = 0; + + if (argc < 6 || argc > 8) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc >= 7) { + jit = (uint8_t)parse_u64(argv[6], "jit"); + } + if (argc == 8) { + runtime = (uint32_t)parse_u64(argv[7], "runtime-ns"); + } + csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), + CSD_CSF_TYPE_EBPF, argv[4], argv[5], + jit ? 1 : 0, runtime); + } else if (!strcmp(op, "admin-load-phantom")) { + if (argc != 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), + CSD_CSF_TYPE_PHANTOM, NULL, NULL, 0, + (uint32_t)parse_u64(argv[4], "runtime-ns")); + } else if (!strcmp(op, "admin-activate")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_activation(fd, (uint16_t)parse_u64(argv[3], "pind"), 1); + } else if (!strcmp(op, "admin-deactivate")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_activation(fd, (uint16_t)parse_u64(argv[3], "pind"), 0); + } else if (!strcmp(op, "admin-unload")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_unload_program(fd, (uint16_t)parse_u64(argv[3], "pind")); + } else if (!strcmp(op, "admin-create-mrs")) { + struct csd_memory_range ranges[2]; + uint16_t rsid; + + if (argc != 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = (uint32_t)parse_u64(argv[3], "out-afdm-id"); + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = (uint32_t)parse_u64(argv[4], "in-afdm-id"); + rsid = csd_admin_create_mrs(fd, ranges, 2); + printf("%" PRIu16 "\n", rsid); + } else if (!strcmp(op, "admin-delete-mrs")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_delete_mrs(fd, (uint16_t)parse_u64(argv[3], "rsid")); + } else if (!strcmp(op, "exec")) { + uint32_t runtime = 0; + uint32_t group_id = 0; + uint64_t cparam1 = 0; + uint64_t cparam2 = 0; + + if (argc < 6 || argc > 10) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc >= 7) { + runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); + } + if (argc >= 8) { + group_id = (uint32_t)parse_u64(argv[7], "group-id"); + } + if (argc >= 9) { + cparam1 = parse_u64(argv[8], "cparam1"); + } + if (argc == 10) { + cparam2 = parse_u64(argv[9], "cparam2"); + } + csd_exec(fd, (uint32_t)parse_u64(argv[3], "pind"), + (uint32_t)parse_u64(argv[4], "in-afdm-id"), + (uint32_t)parse_u64(argv[5], "out-afdm-id"), + runtime, group_id, cparam1, cparam2); + } else if (!strcmp(op, "create-group")) { + uint32_t id; + + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + id = csd_create_group(fd, (int8_t)parse_u64(argv[3], "prio"), + (uint32_t)parse_u64(argv[4], "bandwidth-kb"), + (uint32_t)parse_u64(argv[5], "deadline-us")); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "set-qos")) { + if (argc != 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_set_qos(fd, (uint32_t)parse_u64(argv[3], "group-id"), + (int8_t)parse_u64(argv[4], "prio"), + (uint32_t)parse_u64(argv[5], "bandwidth-kb"), + (uint32_t)parse_u64(argv[6], "deadline-us")); + } else if (!strcmp(op, "delete-group")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_delete_group(fd, (uint32_t)parse_u64(argv[3], "group-id")); + } else if (!strcmp(op, "write")) { + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_write(fd, (uint32_t)parse_u64(argv[3], "id"), + parse_u64(argv[4], "offset"), argv[5], + (uint32_t)strlen(argv[5]) + 1); + } else if (!strcmp(op, "read")) { + uint64_t size64; + uint32_t size; + void *buf = NULL; + + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + size64 = parse_u64(argv[5], "bytes"); + if (size64 > UINT32_MAX) { + fprintf(stderr, "read size exceeds UINT32_MAX\n"); + return EXIT_FAILURE; + } + size = (uint32_t)size64; + if (posix_memalign(&buf, 4096, (size + 4095) & ~4095U)) { + perror("posix_memalign"); + return EXIT_FAILURE; + } + memset(buf, 0, (size + 4095) & ~4095U); + csd_read(fd, (uint32_t)parse_u64(argv[3], "id"), + parse_u64(argv[4], "offset"), buf, size); + dump_hex(buf, size); + free(buf); + } else if (!strcmp(op, "nvm-to-afdm")) { + if (argc != 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_nvm_to_afdm(fd, (uint32_t)parse_u64(argv[3], "id"), + parse_u64(argv[4], "offset"), + parse_u64(argv[5], "slba"), + (uint16_t)parse_u64(argv[6], "nlb")); + } else { + usage(argv[0]); + return EXIT_FAILURE; + } + + close(fd); + return EXIT_SUCCESS; +} diff --git a/tests/femu-csd/csd-vadd.bpf.c b/tests/femu-csd/csd-vadd.bpf.c new file mode 100644 index 00000000000..6e29bdcfad2 --- /dev/null +++ b/tests/femu-csd/csd-vadd.bpf.c @@ -0,0 +1,25 @@ +#include "femu-csd-kernel.h" + +long long csd_vadd_bpf(struct femu_csd_args *args) +{ + int *in; + int *out; + long long count; + + if (args->numr < 2) { + return -1; + } + + in = args->mr_addr[1]; + out = args->mr_addr[0]; + count = args->cparam1; + if (count <= 0) { + return -1; + } + + for (long long i = 0; i < count; i++) { + out[i] = in[i * 2] + in[i * 2 + 1]; + } + + return count; +} diff --git a/tests/femu-csd/csd-vadd.c b/tests/femu-csd/csd-vadd.c new file mode 100644 index 00000000000..4d8f4efbc57 --- /dev/null +++ b/tests/femu-csd/csd-vadd.c @@ -0,0 +1,62 @@ +#include +#include +#include "femu-csd-kernel.h" + +int64_t csd_vadd(struct femu_csd_args *args) +{ + int *out = args->mr_addr[0]; + int *in = args->mr_addr[1]; + long long count = args->cparam1; + + if (count == 0 && args->numr >= 2) { + long long out_count = args->mr_len[0] / (long long)sizeof(*out); + long long in_count = args->mr_len[1] / (2 * (long long)sizeof(*in)); + + count = out_count < in_count ? out_count : in_count; + } + + if (args->numr < 2 || count < 0) { + return -1; + } + + for (long long i = 0; i < count; i++) { + out[i] = in[i * 2] + in[i * 2 + 1]; + } + + return count; +} + +int64_t csd_vadd_indirect(struct femu_csd_args *args) +{ + int *output; + int *input; + int *global_mem; + long long count = args->cparam1; + int pos; + int start_loc; + + if (args->numr < 3 || count < 0) { + return -1; + } + + output = args->mr_addr[0]; + input = args->mr_addr[1]; + global_mem = args->mr_addr[2]; + pos = global_mem[0]; + start_loc = global_mem[1]; + + if (start_loc > 0 && pos > 0) { + memmove(output, input + start_loc, (pos - start_loc) * sizeof(int)); + pos -= start_loc; + start_loc = 0; + } + + for (long long i = 0; i < count; i++) { + output[pos++] = input[i * 2] + input[i * 2 + 1]; + } + + global_mem[1] = (pos / (512 / (int)sizeof(int))) * (512 / (int)sizeof(int)); + global_mem[0] = pos; + + return global_mem[1] / (512 / (int)sizeof(int)); +} diff --git a/tests/femu-csd/femu-csd-kernel.h b/tests/femu-csd/femu-csd-kernel.h new file mode 100644 index 00000000000..9b50ca89e0b --- /dev/null +++ b/tests/femu-csd/femu-csd-kernel.h @@ -0,0 +1,14 @@ +#ifndef FEMU_CSD_KERNEL_H +#define FEMU_CSD_KERNEL_H + +struct femu_csd_args { + int numr; + void **mr_addr; + long long *mr_len; + long long cparam1; + long long cparam2; + void *data_buffer; + long long buffer_len; +} __attribute__((packed)); + +#endif