From 7f585cbab12ccd4ca2f4691aa576d45c34bce93d Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Mon, 25 May 2026 11:35:50 +0800 Subject: [PATCH 01/10] hw/femu: add experimental CSD mode --- hw/femu/backend/dram.c | 3 +- hw/femu/csd/csd.c | 598 +++++++++++++++++++++++++++++++++++++++++ hw/femu/csd/csd.h | 176 ++++++++++++ hw/femu/femu.c | 10 + hw/femu/meson.build | 2 +- hw/femu/nvme.h | 17 ++ 6 files changed, 804 insertions(+), 2 deletions(-) create mode 100644 hw/femu/csd/csd.c create mode 100644 hw/femu/csd/csd.h diff --git a/hw/femu/backend/dram.c b/hw/femu/backend/dram.c index a58068634c9..c34dc663b72 100644 --- a/hw/femu/backend/dram.c +++ b/hw/femu/backend/dram.c @@ -57,7 +57,8 @@ int backend_rw(SsdDramBackend *b, QEMUSGList *qsg, uint64_t *lbal, bool is_write mb_oft = lbal[sg_cur_index]; } else if (b->femu_mode == FEMU_BBSSD_MODE || b->femu_mode == FEMU_NOSSD_MODE || - b->femu_mode == FEMU_ZNSSD_MODE) { + b->femu_mode == FEMU_ZNSSD_MODE || + b->femu_mode == FEMU_CSD_MODE) { mb_oft += cur_len; } else { assert(0); diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c new file mode 100644 index 00000000000..7134f5e7334 --- /dev/null +++ b/hw/femu/csd/csd.c @@ -0,0 +1,598 @@ +#include "qemu/osdep.h" +#include "qapi/error.h" + +#include "csd.h" + +typedef struct FemuCsdAfdm { + uint32_t id; + uint64_t size; + uint8_t *data; +} FemuCsdAfdm; + +typedef struct FemuCsdProgram { + uint32_t id; + uint8_t type; + uint32_t runtime; + uint16_t runtime_scale; + uint64_t size; + uint8_t *data; +} FemuCsdProgram; + +typedef struct FemuCsdGroup { + uint32_t id; + int8_t prio; + uint8_t qos_flags; + uint32_t bandwidth; + uint32_t deadline; +} FemuCsdGroup; + +typedef struct FemuCsdState { + CsdCtrlParams params; + uint64_t fdm_capacity; + uint64_t fdm_used; + uint32_t next_afdm_id; + uint32_t next_csf_id; + uint32_t next_group_id; + GHashTable *afdms; + GHashTable *programs; + GHashTable *groups; + QemuMutex lock; +} FemuCsdState; + +static void csd_check_size(void) +{ + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDownloadCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdAllocFdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeallocAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdNvmToAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdExecCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdReadAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdWriteAfdmCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdCreateGroupCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdSetQosCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeleteGroupCmd) != 64); +} + +static FemuCsdState *csd_state(FemuCtrl *n) +{ + return n->ext_ops.state; +} + +static void csd_afdm_free(gpointer opaque) +{ + FemuCsdAfdm *afdm = opaque; + + if (!afdm) { + return; + } + + g_free(afdm->data); + g_free(afdm); +} + +static void csd_program_free(gpointer opaque) +{ + FemuCsdProgram *program = opaque; + + if (!program) { + return; + } + + g_free(program->data); + g_free(program); +} + +static void csd_init_ctrl_str(FemuCtrl *n) +{ + static int csd_id; + const char *mn = "FEMU Computational Storage Controller"; + const char *sn = "vCSD"; + + nvme_set_ctrl_name(n, mn, sn, &csd_id); +} + +static void csd_init(FemuCtrl *n, Error **errp) +{ + FemuCsdState *csd; + + csd_check_size(); + + if (n->csd_params.fdm_size_mb == 0) { + error_setg(errp, "CSD mode requires fdm_size to be non-zero"); + return; + } + + if (n->csd_params.fdm_size_mb > UINT64_MAX / MiB) { + error_setg(errp, "CSD fdm_size is too large"); + return; + } + + if (n->csd_params.nr_cu == 0 || n->csd_params.nr_cu > 64) { + error_setg(errp, "CSD nr_cu must be in range [1, 64]"); + return; + } + + if (n->csd_params.nr_thread == 0) { + error_setg(errp, "CSD nr_thread must be non-zero"); + return; + } + + if (n->csd_params.csf_runtime_scale == 0) { + error_setg(errp, "CSD csf_runtime_scale must be non-zero"); + return; + } + + csd_init_ctrl_str(n); + + csd = g_new0(FemuCsdState, 1); + csd->params = n->csd_params; + csd->fdm_capacity = n->csd_params.fdm_size_mb * MiB; + csd->next_afdm_id = 1; + csd->next_csf_id = 1; + csd->next_group_id = 1; + csd->afdms = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + csd_afdm_free); + csd->programs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + csd_program_free); + csd->groups = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + g_free); + qemu_mutex_init(&csd->lock); + n->ext_ops.state = csd; + + femu_log("%s,CSD mode initialized: fdm=%" PRIu64 "MB, " + "nr_cu=%u, nr_thread=%u\n", + n->devname, csd->params.fdm_size_mb, csd->params.nr_cu, + csd->params.nr_thread); +} + +static void csd_exit(FemuCtrl *n) +{ + FemuCsdState *csd = csd_state(n); + + if (!csd) { + return; + } + + g_hash_table_destroy(csd->afdms); + g_hash_table_destroy(csd->programs); + g_hash_table_destroy(csd->groups); + qemu_mutex_destroy(&csd->lock); + g_free(csd); + n->ext_ops.state = NULL; +} + +static FemuCsdProgram *csd_get_program_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->programs, GUINT_TO_POINTER(id)); +} + +static FemuCsdAfdm *csd_get_afdm_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->afdms, GUINT_TO_POINTER(id)); +} + +static FemuCsdGroup *csd_get_group_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->groups, GUINT_TO_POINTER(id)); +} + +static uint16_t csd_check_afdm_range(FemuCsdAfdm *afdm, uint64_t offset, + uint64_t size) +{ + if (!afdm || size == 0 || offset > afdm->size || + size > afdm->size - offset) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (size > UINT32_MAX) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdDownloadCmd *download = (NvmeCsdDownloadCmd *)cmd; + uint64_t size = le64_to_cpu(download->size); + uint64_t prp1 = le64_to_cpu(download->prp1); + uint64_t prp2 = le64_to_cpu(download->prp2); + FemuCsdProgram *program; + uint32_t id; + uint16_t status = NVME_SUCCESS; + + if (download->csf_type != NVME_CSD_CSF_TYPE_PHANTOM || + size > UINT32_MAX) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + program = g_new0(FemuCsdProgram, 1); + program->type = download->csf_type; + program->runtime = le32_to_cpu(download->runtime); + program->runtime_scale = le16_to_cpu(download->runtime_scale); + program->size = size; + + if (size) { + program->data = g_malloc0(size); + status = dma_write_prp(n, program->data, size, prp1, prp2); + if (status) { + csd_program_free(program); + return status | NVME_DNR; + } + } + + qemu_mutex_lock(&csd->lock); + id = csd->next_csf_id++; + if (id == 0) { + csd->next_csf_id = 1; + id = csd->next_csf_id++; + } + program->id = id; + g_hash_table_insert(csd->programs, GUINT_TO_POINTER(id), program); + qemu_mutex_unlock(&csd->lock); + + req->cqe.n.result = id; + return NVME_SUCCESS; +} + +static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdExecCmd *exec = (NvmeCsdExecCmd *)cmd; + uint32_t csf_id = le32_to_cpu(exec->csf_id); + uint32_t in_id = le32_to_cpu(exec->in_afdm_id); + uint32_t out_id = le32_to_cpu(exec->out_afdm_id); + uint32_t group_id = le32_to_cpu(exec->group); + uint32_t runtime = le32_to_cpu(exec->runtime); + FemuCsdProgram *program; + FemuCsdAfdm *in; + FemuCsdAfdm *out; + uint64_t copy_size; + + qemu_mutex_lock(&csd->lock); + program = csd_get_program_locked(csd, csf_id); + if (!program) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (group_id != 0 && !csd_get_group_locked(csd, group_id)) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (in_id != 0 || out_id != 0) { + in = csd_get_afdm_locked(csd, in_id); + out = csd_get_afdm_locked(csd, out_id); + if (!in || !out) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + copy_size = MIN(in->size, out->size); + memcpy(out->data, in->data, copy_size); + req->cqe.n.result = copy_size > UINT32_MAX ? UINT32_MAX : copy_size; + } else { + req->cqe.n.result = 0; + } + + if (runtime == 0) { + runtime = program->runtime; + } + qemu_mutex_unlock(&csd->lock); + + if (runtime) { + req->reqlat += runtime; + req->expire_time += runtime; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_normalize_prio(int8_t *prio) +{ + if (*prio == 0) { + *prio = 5; + } + + if (*prio < 1 || *prio > 9) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_create_group(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdCreateGroupCmd *create = (NvmeCsdCreateGroupCmd *)cmd; + FemuCsdGroup *group; + uint32_t id; + int8_t prio = create->prio; + uint16_t status; + + status = csd_normalize_prio(&prio); + if (status) { + return status; + } + + group = g_new0(FemuCsdGroup, 1); + group->prio = prio; + group->qos_flags = create->qos_flags; + group->bandwidth = le32_to_cpu(create->bandwidth); + group->deadline = le32_to_cpu(create->deadline); + + qemu_mutex_lock(&csd->lock); + id = csd->next_group_id++; + if (id == 0) { + csd->next_group_id = 1; + id = csd->next_group_id++; + } + group->id = id; + g_hash_table_insert(csd->groups, GUINT_TO_POINTER(id), group); + qemu_mutex_unlock(&csd->lock); + + req->cqe.n.result = id; + return NVME_SUCCESS; +} + +static uint16_t csd_set_qos(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdSetQosCmd *set = (NvmeCsdSetQosCmd *)cmd; + uint32_t id = le32_to_cpu(set->id); + int8_t prio = set->prio; + FemuCsdGroup *group; + uint16_t status; + + status = csd_normalize_prio(&prio); + if (status) { + return status; + } + + qemu_mutex_lock(&csd->lock); + group = csd_get_group_locked(csd, id); + if (!group) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + group->prio = prio; + group->qos_flags = set->qos_flags; + group->bandwidth = le32_to_cpu(set->bandwidth); + group->deadline = le32_to_cpu(set->deadline); + qemu_mutex_unlock(&csd->lock); + + return NVME_SUCCESS; +} + +static uint16_t csd_delete_group(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdDeleteGroupCmd *delete = (NvmeCsdDeleteGroupCmd *)cmd; + uint32_t id = le32_to_cpu(delete->id); + + qemu_mutex_lock(&csd->lock); + if (!csd_get_group_locked(csd, id)) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + g_hash_table_remove(csd->groups, GUINT_TO_POINTER(id)); + qemu_mutex_unlock(&csd->lock); + + return NVME_SUCCESS; +} + +static uint16_t csd_alloc_fdm(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdAllocFdmCmd *alloc = (NvmeCsdAllocFdmCmd *)cmd; + FemuCsdAfdm *afdm; + uint64_t size = le64_to_cpu(alloc->size); + uint32_t id; + + if (alloc->type != NVME_CSD_FDM_TYPE_HOST || size == 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + if (size > csd->fdm_capacity - csd->fdm_used) { + qemu_mutex_unlock(&csd->lock); + return NVME_CAP_EXCEEDED | NVME_DNR; + } + + id = csd->next_afdm_id++; + if (id == 0) { + csd->next_afdm_id = 1; + id = csd->next_afdm_id++; + } + + afdm = g_new0(FemuCsdAfdm, 1); + afdm->id = id; + afdm->size = size; + afdm->data = g_malloc0(size); + + g_hash_table_insert(csd->afdms, GUINT_TO_POINTER(id), afdm); + csd->fdm_used += size; + qemu_mutex_unlock(&csd->lock); + + req->cqe.n.result = id; + return NVME_SUCCESS; +} + +static uint16_t csd_dealloc_afdm(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdDeallocAfdmCmd *dealloc = (NvmeCsdDeallocAfdmCmd *)cmd; + uint32_t id = le32_to_cpu(dealloc->id); + FemuCsdAfdm *afdm; + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + if (!afdm) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + csd->fdm_used -= afdm->size; + g_hash_table_remove(csd->afdms, GUINT_TO_POINTER(id)); + qemu_mutex_unlock(&csd->lock); + + return NVME_SUCCESS; +} + +static uint16_t csd_read_afdm(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdReadAfdmCmd *read = (NvmeCsdReadAfdmCmd *)cmd; + uint32_t id = le32_to_cpu(read->id); + uint64_t offset = le64_to_cpu(read->offset); + uint64_t size = le64_to_cpu(read->size); + uint64_t prp1 = le64_to_cpu(read->prp1); + uint64_t prp2 = le64_to_cpu(read->prp2); + FemuCsdAfdm *afdm; + uint16_t status; + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + status = csd_check_afdm_range(afdm, offset, size); + if (!status) { + status = dma_read_prp(n, afdm->data + offset, size, prp1, prp2); + } + qemu_mutex_unlock(&csd->lock); + + if (status) { + return status | NVME_DNR; + } + + req->cqe.n.result = size; + return NVME_SUCCESS; +} + +static uint16_t csd_write_afdm(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdWriteAfdmCmd *write = (NvmeCsdWriteAfdmCmd *)cmd; + uint32_t id = le32_to_cpu(write->id); + uint64_t offset = le64_to_cpu(write->offset); + uint64_t size = le64_to_cpu(write->size); + uint64_t prp1 = le64_to_cpu(write->prp1); + uint64_t prp2 = le64_to_cpu(write->prp2); + FemuCsdAfdm *afdm; + uint16_t status; + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + status = csd_check_afdm_range(afdm, offset, size); + if (!status) { + status = dma_write_prp(n, afdm->data + offset, size, prp1, prp2); + } + qemu_mutex_unlock(&csd->lock); + + if (status) { + return status | NVME_DNR; + } + + req->cqe.n.result = size; + return NVME_SUCCESS; +} + +static uint16_t csd_nvm_to_afdm(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdNvmToAfdmCmd *copy = (NvmeCsdNvmToAfdmCmd *)cmd; + const uint8_t lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas); + const uint8_t data_shift = ns->id_ns.lbaf[lba_index].lbads; + uint32_t id = le32_to_cpu(copy->id); + uint64_t offset = le64_to_cpu(copy->offset); + uint64_t slba = le64_to_cpu(copy->slba); + uint64_t nlb = le16_to_cpu(copy->nlb) + 1; + uint64_t size = nlb << data_shift; + uint64_t nvm_offset = slba << data_shift; + FemuCsdAfdm *afdm; + uint16_t status; + + if (slba + nlb > le64_to_cpu(ns->id_ns.nsze) || + nvm_offset > n->mbe->size || size > n->mbe->size - nvm_offset) { + return NVME_LBA_RANGE | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + afdm = csd_get_afdm_locked(csd, id); + status = csd_check_afdm_range(afdm, offset, size); + if (!status) { + memcpy(afdm->data + offset, + (uint8_t *)n->mbe->logical_space + nvm_offset, size); + } + qemu_mutex_unlock(&csd->lock); + + if (status) { + return status; + } + + req->cqe.n.result = size; + return NVME_SUCCESS; +} + +static uint16_t csd_io_cmd(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, + NvmeRequest *req) +{ + switch (cmd->opcode) { + case NVME_CMD_READ: + case NVME_CMD_WRITE: + return nvme_rw(n, ns, cmd, req); + case NVME_CMD_CSD_DOWNLOAD: + return csd_download(n, cmd, req); + case NVME_CMD_CSD_ALLOC_FDM: + return csd_alloc_fdm(n, cmd, req); + case NVME_CMD_CSD_DEALLOC_AFDM: + return csd_dealloc_afdm(n, cmd); + case NVME_CMD_CSD_NVM_TO_AFDM: + return csd_nvm_to_afdm(n, ns, cmd, req); + case NVME_CMD_CSD_EXEC: + return csd_exec(n, cmd, req); + case NVME_CMD_CSD_READ_AFDM: + return csd_read_afdm(n, cmd, req); + case NVME_CMD_CSD_WRITE_AFDM: + return csd_write_afdm(n, cmd, req); + case NVME_CMD_CSD_CREATE_GROUP: + return csd_create_group(n, cmd, req); + case NVME_CMD_CSD_SET_QOS: + return csd_set_qos(n, cmd); + case NVME_CMD_CSD_DELETE_GROUP: + return csd_delete_group(n, cmd); + default: + return NVME_INVALID_OPCODE | NVME_DNR; + } +} + +int nvme_register_csd(FemuCtrl *n) +{ + n->ext_ops = (FemuExtCtrlOps) { + .state = NULL, + .init = csd_init, + .exit = csd_exit, + .rw_check_req = NULL, + .start_ctrl = NULL, + .admin_cmd = NULL, + .io_cmd = csd_io_cmd, + .get_log = NULL, + }; + + return 0; +} diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h new file mode 100644 index 00000000000..01294bd3c7b --- /dev/null +++ b/hw/femu/csd/csd.h @@ -0,0 +1,176 @@ +#ifndef FEMU_CSD_H +#define FEMU_CSD_H + +#include "../nvme.h" + +enum FemuCsdIoCommands { + NVME_CMD_CSD_DOWNLOAD = 0xa1, + NVME_CMD_CSD_ALLOC_FDM = 0xb0, + NVME_CMD_CSD_DEALLOC_AFDM = 0xc0, + NVME_CMD_CSD_NVM_TO_AFDM = 0xd0, + NVME_CMD_CSD_EXEC = 0xe0, + NVME_CMD_CSD_READ_AFDM = 0xf2, + NVME_CMD_CSD_WRITE_AFDM = 0xf5, + NVME_CMD_CSD_CREATE_GROUP = 0xf6, + NVME_CMD_CSD_SET_QOS = 0xf7, + NVME_CMD_CSD_DELETE_GROUP = 0xf8, +}; + +enum FemuCsdFdmType { + NVME_CSD_FDM_TYPE_HOST = 0, +}; + +enum FemuCsdCsfType { + NVME_CSD_CSF_TYPE_PHANTOM = 0, + NVME_CSD_CSF_TYPE_EBPF = 1, + NVME_CSD_CSF_TYPE_BITSTREAM = 2, + NVME_CSD_CSF_TYPE_SHARED_LIB = 3, +}; + +typedef struct QEMU_PACKED NvmeCsdDownloadCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t size; + uint8_t csf_type; + uint8_t csf_flags; + uint16_t runtime_scale; + uint32_t runtime; + uint32_t rsvd15[2]; +} NvmeCsdDownloadCmd; + +typedef struct QEMU_PACKED NvmeCsdAllocFdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t size; + uint8_t type; + uint8_t rsvd14[7]; + uint64_t rsvd15; +} NvmeCsdAllocFdmCmd; + +typedef struct QEMU_PACKED NvmeCsdDeallocAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t id; + uint32_t rsvd11[5]; +} NvmeCsdDeallocAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdNvmToAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t slba; + uint16_t nlb; + uint16_t rsvd12; + uint32_t id; + uint64_t offset; +} NvmeCsdNvmToAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdExecCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t csf_id; + uint32_t in_afdm_id; + uint32_t out_afdm_id; + uint32_t group; + uint32_t rsvd14; + uint32_t runtime; +} NvmeCsdExecCmd; + +typedef struct QEMU_PACKED NvmeCsdReadAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t offset; + uint64_t size; + uint32_t id; + uint32_t rsvd15; +} NvmeCsdReadAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdWriteAfdmCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint64_t offset; + uint64_t size; + uint32_t id; + uint32_t rsvd15; +} NvmeCsdWriteAfdmCmd; + +typedef struct QEMU_PACKED NvmeCsdCreateGroupCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + int8_t prio; + uint8_t qos_flags; + uint16_t rsvd10; + uint32_t bandwidth; + uint32_t deadline; + uint32_t rsvd14[3]; +} NvmeCsdCreateGroupCmd; + +typedef struct QEMU_PACKED NvmeCsdSetQosCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + int8_t prio; + uint8_t qos_flags; + uint16_t rsvd10; + uint32_t bandwidth; + uint32_t deadline; + uint32_t id; + uint32_t rsvd15[2]; +} NvmeCsdSetQosCmd; + +typedef struct QEMU_PACKED NvmeCsdDeleteGroupCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t rsvd2[2]; + uint64_t prp1; + uint64_t prp2; + uint32_t id; + uint32_t rsvd11[5]; +} NvmeCsdDeleteGroupCmd; + +#endif diff --git a/hw/femu/femu.c b/hw/femu/femu.c index f0cd531ea11..248a2e694e7 100644 --- a/hw/femu/femu.c +++ b/hw/femu/femu.c @@ -802,6 +802,8 @@ static int nvme_register_extensions(FemuCtrl *n) nvme_register_bbssd(n); } else if (ZNSSD(n)) { nvme_register_znssd(n); + } else if (CSD(n)) { + nvme_register_csd(n); } else { /* TODO: For future extensions */ } @@ -962,6 +964,14 @@ static const Property femu_props[] = { DEFINE_PROP_UINT8("lnum_lun", FemuCtrl, oc_params.num_lun, 8), DEFINE_PROP_UINT8("lnum_pln", FemuCtrl, oc_params.num_pln, 2), DEFINE_PROP_UINT16("lmetasize", FemuCtrl, oc_params.sos, 16), + DEFINE_PROP_UINT64("fdm_size", FemuCtrl, csd_params.fdm_size_mb, 0), + DEFINE_PROP_UINT8("nr_cu", FemuCtrl, csd_params.nr_cu, 4), + DEFINE_PROP_UINT8("nr_thread", FemuCtrl, csd_params.nr_thread, 4), + DEFINE_PROP_UINT64("time_slice", FemuCtrl, csd_params.time_slice, 200000), + DEFINE_PROP_UINT64("context_switch_time", FemuCtrl, + csd_params.context_switch_time, 200), + DEFINE_PROP_UINT16("csf_runtime_scale", FemuCtrl, + csd_params.csf_runtime_scale, 3), DEFINE_PROP_UINT8("zns_num_ch", FemuCtrl, zns_params.zns_num_ch, 2), DEFINE_PROP_UINT8("zns_num_lun", FemuCtrl, zns_params.zns_num_lun, 4), DEFINE_PROP_UINT8("zns_num_plane", FemuCtrl, zns_params.zns_num_plane, 2), diff --git a/hw/femu/meson.build b/hw/femu/meson.build index 984e48d53f1..5f0c7dc2ac1 100644 --- a/hw/femu/meson.build +++ b/hw/femu/meson.build @@ -1 +1 @@ -system_ss.add(when: 'CONFIG_FEMU_PCI', if_true: files('dma.c', 'intr.c', 'nvme-util.c', 'nvme-admin.c', 'nvme-io.c', 'femu.c', 'nossd/nop.c', 'nand/nand.c', 'timing-model/timing.c', 'ocssd/oc12.c', 'ocssd/oc20.c', 'zns/zns.c', 'zns/zftl.c','bbssd/bb.c', 'bbssd/ftl.c', 'lib/pqueue.c', 'lib/rte_ring.c', 'backend/dram.c')) +system_ss.add(when: 'CONFIG_FEMU_PCI', if_true: files('dma.c', 'intr.c', 'nvme-util.c', 'nvme-admin.c', 'nvme-io.c', 'femu.c', 'nossd/nop.c', 'nand/nand.c', 'timing-model/timing.c', 'ocssd/oc12.c', 'ocssd/oc20.c', 'zns/zns.c', 'zns/zftl.c','bbssd/bb.c', 'bbssd/ftl.c', 'csd/csd.c', 'lib/pqueue.c', 'lib/rte_ring.c', 'backend/dram.c')) diff --git a/hw/femu/nvme.h b/hw/femu/nvme.h index 901cf7e5018..c8ff6fc4466 100644 --- a/hw/femu/nvme.h +++ b/hw/femu/nvme.h @@ -1531,6 +1531,15 @@ typedef struct ZNSCtrlParams { int zns_flash_type; } ZNSCtrlParams; +typedef struct CsdCtrlParams { + uint64_t fdm_size_mb; + uint8_t nr_cu; + uint8_t nr_thread; + uint64_t time_slice; + uint64_t context_switch_time; + uint16_t csf_runtime_scale; +} CsdCtrlParams; + typedef struct OcCtrlParams { uint16_t sec_size; uint8_t secs_per_pg; @@ -1680,6 +1689,7 @@ typedef struct FemuCtrl { uint8_t lver; /* Coperd: OCSSD version, 0x1 -> OC1.2, 0x2 -> OC2.0 */ uint32_t memsz; OcCtrlParams oc_params; + CsdCtrlParams csd_params; Oc12Ctrl *oc12_ctrl; volatile int64_t chip_next_avail_time[FEMU_MAX_NUM_CHIPS]; @@ -1745,6 +1755,7 @@ enum { FEMU_BBSSD_MODE = 1, FEMU_NOSSD_MODE = 2, FEMU_ZNSSD_MODE = 3, + FEMU_CSD_MODE = 4, FEMU_SMARTSSD_MODE, FEMU_KVSSD_MODE, }; @@ -1779,6 +1790,11 @@ static inline bool ZNSSD(FemuCtrl *n) return (n->femu_mode == FEMU_ZNSSD_MODE); } +static inline bool CSD(FemuCtrl *n) +{ + return (n->femu_mode == FEMU_CSD_MODE); +} + /* Basic NVMe Queue Pair operation APIs from nvme-util.c */ int nvme_check_sqid(FemuCtrl *n, uint16_t sqid); int nvme_check_cqid(FemuCtrl *n, uint16_t cqid); @@ -1847,6 +1863,7 @@ int nvme_register_ocssd20(FemuCtrl *n); int nvme_register_nossd(FemuCtrl *n); int nvme_register_bbssd(FemuCtrl *n); int nvme_register_znssd(FemuCtrl *n); +int nvme_register_csd(FemuCtrl *n); static inline uint64_t ns_blks(NvmeNamespace *ns, uint8_t lba_idx) { From 92b38a08e2080e3e4c5d6ce5ddb7098e55e8b693 Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Mon, 25 May 2026 11:37:43 +0800 Subject: [PATCH 02/10] tests/femu-csd: add CSD passthrough tests and run script --- README.md | 35 +++ femu-scripts/femu-copy-scripts.sh | 3 +- femu-scripts/run-csd.sh | 56 ++++ tests/femu-csd/Makefile | 14 + tests/femu-csd/README.md | 45 +++ tests/femu-csd/csd-passthru.c | 441 ++++++++++++++++++++++++++++++ 6 files changed, 592 insertions(+), 2 deletions(-) create mode 100755 femu-scripts/run-csd.sh create mode 100644 tests/femu-csd/Makefile create mode 100644 tests/femu-csd/README.md create mode 100644 tests/femu-csd/csd-passthru.c diff --git a/README.md b/README.md index fc8e2c9e359..36078aaef18 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ - [WhiteBox SSD Mode (OCSSD)](#whitebox-ssd-mode-ocssd) - [Zoned Namespace SSD Mode (ZNSSD)](#zoned-namespace-ssd-mode-znssd) - [NoSSD Mode](#nossd-mode) + - [Computational Storage Mode (CSD)](#computational-storage-mode-csd) - [Configuration](#configuration) - [Development](#development) - [Troubleshooting](#troubleshooting) @@ -367,6 +368,37 @@ Ultra-fast NVMe emulation without storage logic. - Performance upper-bound testing - Fast storage prototyping +### Computational Storage Mode (CSD) + +Experimental computational storage support derived from CEMU. CSD is selected +with `femu_mode=4` and keeps CSD-specific code under `hw/femu/csd/`. + +```bash +./run-csd.sh +``` + +**Key Parameters:** +```bash +fdm_size=64 # Functional data memory size (MB), required +nr_cu=4 # Number of compute units +nr_thread=4 # Number of functional simulation threads +time_slice=200000 # Scheduler time slice (ns) +context_switch_time=200 # Context switch time (ns) +csf_runtime_scale=3 # Runtime scaling factor +``` + +**Current Scope:** +- Normal NVMe read/write compatibility in CSD mode +- Vendor commands for AFDM allocation, read/write, NVM-to-AFDM copy +- Minimal phantom CSF download/execute path +- Group/QoS command metadata +- Guest-side passthrough tests in `tests/femu-csd/` + +The initial CSD path does not require a CEMU-specific Linux kernel, FDMFS, or a +fixed VM image. Advanced CEMU features such as VM freezing, virtual clock +changes, full eBPF execution, and FDMFS are intentionally kept out of the +default path while the base mode is upstreamed. + --- ## Configuration @@ -451,6 +483,9 @@ hw/femu/ # Main FEMU implementation │ └── zftl.c # Zone-based FTL ├── nossd/ # NoSSD mode │ └── nop.c # Minimal processing +├── csd/ # Computational Storage mode +│ ├── csd.c # CSD command handling +│ └── csd.h # CSD private command definitions ├── timing-model/ # Performance modeling ├── backend/ # Storage backends └── lib/ # Utility libraries diff --git a/femu-scripts/femu-copy-scripts.sh b/femu-scripts/femu-copy-scripts.sh index 8e49e319186..598a956688a 100755 --- a/femu-scripts/femu-copy-scripts.sh +++ b/femu-scripts/femu-copy-scripts.sh @@ -4,7 +4,7 @@ FSD="../femu-scripts" -CPL=(pkgdep.sh femu-compile.sh run-whitebox.sh run-blackbox.sh run-blackbox-fdp.sh run-nossd.sh run-zns.sh pin.sh ftk) +CPL=(pkgdep.sh femu-compile.sh run-whitebox.sh run-blackbox.sh run-blackbox-fdp.sh run-nossd.sh run-zns.sh run-csd.sh pin.sh ftk) echo "" echo "==> Copying following FEMU script to current directory:" @@ -18,4 +18,3 @@ do done echo "Done!" echo "" - diff --git a/femu-scripts/run-csd.sh b/femu-scripts/run-csd.sh new file mode 100755 index 00000000000..8dc7fa98e3c --- /dev/null +++ b/femu-scripts/run-csd.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Run FEMU as a Computational Storage Drive (CSD) + +# Image directory +IMGDIR=$HOME/images +# Virtual machine disk image +OSIMGF=$IMGDIR/u20s.qcow2 + +# FEMU CSD parameters +SSD_SIZE_MB=4096 +FDM_SIZE_MB=64 +NR_CU=4 +NR_THREAD=4 +TIME_SLICE=200000 +CONTEXT_SWITCH_TIME=200 +CSF_RUNTIME_SCALE=3 + +#----------------------------------------------------------------------- + +# Compose the entire FEMU CSD command line options +FEMU_OPTIONS="-device femu" +FEMU_OPTIONS=${FEMU_OPTIONS}",devsz_mb=${SSD_SIZE_MB}" +FEMU_OPTIONS=${FEMU_OPTIONS}",namespaces=1" +FEMU_OPTIONS=${FEMU_OPTIONS}",femu_mode=4" +FEMU_OPTIONS=${FEMU_OPTIONS}",fdm_size=${FDM_SIZE_MB}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nr_cu=${NR_CU}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nr_thread=${NR_THREAD}" +FEMU_OPTIONS=${FEMU_OPTIONS}",time_slice=${TIME_SLICE}" +FEMU_OPTIONS=${FEMU_OPTIONS}",context_switch_time=${CONTEXT_SWITCH_TIME}" +FEMU_OPTIONS=${FEMU_OPTIONS}",csf_runtime_scale=${CSF_RUNTIME_SCALE}" + +echo ${FEMU_OPTIONS} + +if [[ ! -e "$OSIMGF" ]]; then + echo "" + echo "VM disk image couldn't be found ..." + echo "Please prepare a usable VM image and place it as $OSIMGF" + echo "Once VM disk image is ready, please rerun this script again" + echo "" + exit +fi + +sudo ./qemu-system-x86_64 \ + -name "FEMU-CSD-VM" \ + -enable-kvm \ + -cpu host \ + -smp 4 \ + -m 4G \ + -device virtio-scsi-pci,id=scsi0 \ + -device scsi-hd,drive=hd0 \ + -drive file=$OSIMGF,if=none,aio=native,cache=none,format=qcow2,id=hd0 \ + ${FEMU_OPTIONS} \ + -net user,hostfwd=tcp::8080-:22 \ + -net nic,model=virtio \ + -nographic \ + -qmp unix:./qmp-sock,server,nowait 2>&1 | tee log diff --git a/tests/femu-csd/Makefile b/tests/femu-csd/Makefile new file mode 100644 index 00000000000..0d79875336d --- /dev/null +++ b/tests/femu-csd/Makefile @@ -0,0 +1,14 @@ +CC ?= gcc +CFLAGS ?= -Wall -Wextra -O2 -g + +TARGETS := csd-passthru + +.PHONY: all clean + +all: $(TARGETS) + +csd-passthru: csd-passthru.c + $(CC) $(CFLAGS) -o $@ $< + +clean: + rm -f $(TARGETS) diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md new file mode 100644 index 00000000000..47a0a47f0f3 --- /dev/null +++ b/tests/femu-csd/README.md @@ -0,0 +1,45 @@ +# FEMU CSD Passthrough Tests + +This directory contains lightweight guest-side tools for validating FEMU CSD +vendor commands without `linux-cemu`, FDMFS, or a fixed VM image. + +Build inside a normal Linux guest: + +```bash +make +``` + +Run a basic AFDM smoke test against a namespace device: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke +``` + +The smoke test sends these CSD commands through `NVME_IOCTL_IO_CMD`: + +- allocate AFDM +- write AFDM +- read AFDM +- download a phantom CSF +- execute the phantom CSF +- deallocate AFDM + +Other useful command-level checks: + +```bash +sudo ./csd-passthru /dev/nvme0n1 alloc 4096 +sudo ./csd-passthru /dev/nvme0n1 download-phantom 1000 +sudo ./csd-passthru /dev/nvme0n1 create-group 5 0 0 +sudo ./csd-passthru /dev/nvme0n1 set-qos 6 0 0 +sudo ./csd-passthru /dev/nvme0n1 exec 0 +sudo ./csd-passthru /dev/nvme0n1 delete-group +sudo ./csd-passthru /dev/nvme0n1 nvm-to-afdm 0 0 0 +``` + +The tool assumes FEMU was started with CSD mode enabled, for example: + +```bash +-device femu,femu_mode=4,fdm_size=64 +``` + +It intentionally does not depend on CEMU's modified kernel driver or FDMFS. diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c new file mode 100644 index 00000000000..50761955678 --- /dev/null +++ b/tests/femu-csd/csd-passthru.c @@ -0,0 +1,441 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum { + CSD_CMD_DOWNLOAD = 0xa1, + CSD_CMD_ALLOC_FDM = 0xb0, + CSD_CMD_DEALLOC_AFDM = 0xc0, + CSD_CMD_NVM_TO_AFDM = 0xd0, + CSD_CMD_EXEC = 0xe0, + CSD_CMD_READ_AFDM = 0xf2, + CSD_CMD_WRITE_AFDM = 0xf5, + CSD_CMD_CREATE_GROUP = 0xf6, + CSD_CMD_SET_QOS = 0xf7, + CSD_CMD_DELETE_GROUP = 0xf8, +}; + +enum { + CSD_CSF_TYPE_PHANTOM = 0, +}; + +static void usage(const char *prog) +{ + fprintf(stderr, + "Usage:\n" + " %s /dev/nvmeXnY smoke\n" + " %s /dev/nvmeXnY alloc \n" + " %s /dev/nvmeXnY dealloc \n" + " %s /dev/nvmeXnY download-phantom \n" + " %s /dev/nvmeXnY exec [runtime-ns] [group-id]\n" + " %s /dev/nvmeXnY create-group \n" + " %s /dev/nvmeXnY set-qos \n" + " %s /dev/nvmeXnY delete-group \n" + " %s /dev/nvmeXnY write \n" + " %s /dev/nvmeXnY read \n" + " %s /dev/nvmeXnY nvm-to-afdm \n", + prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog); +} + +static uint64_t parse_u64(const char *s, const char *name) +{ + char *end = NULL; + uint64_t v; + + errno = 0; + v = strtoull(s, &end, 0); + if (errno || !end || *end) { + fprintf(stderr, "invalid %s: %s\n", name, s); + exit(EXIT_FAILURE); + } + + return v; +} + +static int submit(int fd, struct nvme_passthru_cmd *cmd) +{ + int ret = ioctl(fd, NVME_IOCTL_IO_CMD, cmd); + + if (ret < 0) { + perror("NVME_IOCTL_IO_CMD"); + return -1; + } + + return ret; +} + +static uint32_t csd_download_phantom(int fd, uint32_t runtime) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_DOWNLOAD, + .nsid = 1, + .cdw12 = CSD_CSF_TYPE_PHANTOM, + .cdw13 = runtime, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static void csd_exec(int fd, uint32_t csf_id, uint32_t in_afdm_id, + uint32_t out_afdm_id, uint32_t runtime, uint32_t group_id) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_EXEC, + .nsid = 1, + .cdw10 = csf_id, + .cdw11 = in_afdm_id, + .cdw12 = out_afdm_id, + .cdw13 = group_id, + .cdw14 = 0, + .cdw15 = runtime, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static uint32_t csd_create_group(int fd, int8_t prio, uint32_t bandwidth, + uint32_t deadline) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_CREATE_GROUP, + .nsid = 1, + .cdw10 = (uint8_t)prio, + .cdw11 = bandwidth, + .cdw12 = deadline, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static void csd_set_qos(int fd, uint32_t group_id, int8_t prio, + uint32_t bandwidth, uint32_t deadline) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_SET_QOS, + .nsid = 1, + .cdw10 = (uint8_t)prio, + .cdw11 = bandwidth, + .cdw12 = deadline, + .cdw13 = group_id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_delete_group(int fd, uint32_t group_id) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_DELETE_GROUP, + .nsid = 1, + .cdw10 = group_id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static uint32_t csd_alloc(int fd, uint64_t size) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_ALLOC_FDM, + .nsid = 1, + .cdw10 = (uint32_t)size, + .cdw11 = (uint32_t)(size >> 32), + .cdw12 = 0, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + +static void csd_dealloc(int fd, uint32_t id) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_DEALLOC_AFDM, + .nsid = 1, + .cdw10 = id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_write(int fd, uint32_t id, uint64_t offset, const void *buf, + uint32_t size) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_WRITE_AFDM, + .nsid = 1, + .addr = (uintptr_t)buf, + .data_len = size, + .cdw10 = (uint32_t)offset, + .cdw11 = (uint32_t)(offset >> 32), + .cdw12 = size, + .cdw13 = 0, + .cdw14 = id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_read(int fd, uint32_t id, uint64_t offset, void *buf, + uint32_t size) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_READ_AFDM, + .nsid = 1, + .addr = (uintptr_t)buf, + .data_len = size, + .cdw10 = (uint32_t)offset, + .cdw11 = (uint32_t)(offset >> 32), + .cdw12 = size, + .cdw13 = 0, + .cdw14 = id, + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_nvm_to_afdm(int fd, uint32_t id, uint64_t offset, + uint64_t slba, uint16_t nlb) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_NVM_TO_AFDM, + .nsid = 1, + .cdw10 = (uint32_t)slba, + .cdw11 = (uint32_t)(slba >> 32), + .cdw12 = nlb, + .cdw13 = id, + .cdw14 = (uint32_t)offset, + .cdw15 = (uint32_t)(offset >> 32), + }; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void dump_hex(const uint8_t *buf, size_t size) +{ + for (size_t i = 0; i < size; i++) { + printf("%02x%s", buf[i], (i + 1) % 16 == 0 ? "\n" : " "); + } + if (size % 16) { + printf("\n"); + } +} + +static void run_smoke(int fd) +{ + const char *msg = "femu-csd-afdm-smoke"; + size_t msg_len = strlen(msg) + 1; + uint8_t *write_buf = NULL; + uint8_t *read_buf = NULL; + uint32_t id; + uint32_t csf_id; + + if (posix_memalign((void **)&write_buf, 4096, 4096) || + posix_memalign((void **)&read_buf, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + memset(write_buf, 0, 4096); + memset(read_buf, 0, 4096); + memcpy(write_buf, msg, msg_len); + + id = csd_alloc(fd, 4096); + printf("allocated AFDM id=%" PRIu32 "\n", id); + + csd_write(fd, id, 0, write_buf, 4096); + csd_read(fd, id, 0, read_buf, 4096); + + if (memcmp(write_buf, read_buf, 4096)) { + fprintf(stderr, "AFDM smoke mismatch\n"); + exit(EXIT_FAILURE); + } + + csf_id = csd_download_phantom(fd, 1000); + printf("downloaded phantom CSF id=%" PRIu32 "\n", csf_id); + csd_exec(fd, csf_id, id, id, 0, 0); + printf("phantom exec passed\n"); + + csd_dealloc(fd, id); + printf("AFDM smoke passed\n"); + + free(write_buf); + free(read_buf); +} + +int main(int argc, char **argv) +{ + const char *dev; + const char *op; + int fd; + + if (argc < 3) { + usage(argv[0]); + return EXIT_FAILURE; + } + + dev = argv[1]; + op = argv[2]; + fd = open(dev, O_RDWR); + if (fd < 0) { + perror(dev); + return EXIT_FAILURE; + } + + if (!strcmp(op, "smoke")) { + run_smoke(fd); + } else if (!strcmp(op, "alloc")) { + uint64_t size; + uint32_t id; + + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + size = parse_u64(argv[3], "bytes"); + id = csd_alloc(fd, size); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "dealloc")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_dealloc(fd, (uint32_t)parse_u64(argv[3], "id")); + } else if (!strcmp(op, "download-phantom")) { + uint32_t id; + + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + id = csd_download_phantom(fd, (uint32_t)parse_u64(argv[3], "runtime-ns")); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "exec")) { + uint32_t runtime = 0; + + uint32_t group_id = 0; + + if (argc < 6 || argc > 8) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc >= 7) { + runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); + } + if (argc == 8) { + group_id = (uint32_t)parse_u64(argv[7], "group-id"); + } + csd_exec(fd, (uint32_t)parse_u64(argv[3], "csf-id"), + (uint32_t)parse_u64(argv[4], "in-afdm-id"), + (uint32_t)parse_u64(argv[5], "out-afdm-id"), + runtime, group_id); + } else if (!strcmp(op, "create-group")) { + uint32_t id; + + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + id = csd_create_group(fd, (int8_t)parse_u64(argv[3], "prio"), + (uint32_t)parse_u64(argv[4], "bandwidth-kb"), + (uint32_t)parse_u64(argv[5], "deadline-us")); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "set-qos")) { + if (argc != 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_set_qos(fd, (uint32_t)parse_u64(argv[3], "group-id"), + (int8_t)parse_u64(argv[4], "prio"), + (uint32_t)parse_u64(argv[5], "bandwidth-kb"), + (uint32_t)parse_u64(argv[6], "deadline-us")); + } else if (!strcmp(op, "delete-group")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_delete_group(fd, (uint32_t)parse_u64(argv[3], "group-id")); + } else if (!strcmp(op, "write")) { + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_write(fd, (uint32_t)parse_u64(argv[3], "id"), + parse_u64(argv[4], "offset"), argv[5], + (uint32_t)strlen(argv[5]) + 1); + } else if (!strcmp(op, "read")) { + uint64_t size64; + uint32_t size; + void *buf = NULL; + + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + size64 = parse_u64(argv[5], "bytes"); + if (size64 > UINT32_MAX) { + fprintf(stderr, "read size exceeds UINT32_MAX\n"); + return EXIT_FAILURE; + } + size = (uint32_t)size64; + if (posix_memalign(&buf, 4096, (size + 4095) & ~4095U)) { + perror("posix_memalign"); + return EXIT_FAILURE; + } + memset(buf, 0, (size + 4095) & ~4095U); + csd_read(fd, (uint32_t)parse_u64(argv[3], "id"), + parse_u64(argv[4], "offset"), buf, size); + dump_hex(buf, size); + free(buf); + } else if (!strcmp(op, "nvm-to-afdm")) { + if (argc != 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_nvm_to_afdm(fd, (uint32_t)parse_u64(argv[3], "id"), + parse_u64(argv[4], "offset"), + parse_u64(argv[5], "slba"), + (uint16_t)parse_u64(argv[6], "nlb")); + } else { + usage(argv[0]); + return EXIT_FAILURE; + } + + close(fd); + return EXIT_SUCCESS; +} From 7181c4067328704a06de0143d49c962c868025d6 Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Mon, 25 May 2026 20:40:37 +0800 Subject: [PATCH 03/10] hw/femu: add CSD shared library execution --- README.md | 6 +- femu-scripts/femu-compile.sh | 16 +- hw/femu/csd/csd.c | 267 +++++++++++++++++++++++++++++-- hw/femu/csd/csd.h | 12 +- hw/femu/meson.build | 11 +- meson.build | 3 + meson_options.txt | 2 + scripts/meson-buildoptions.sh | 3 + tests/femu-csd/Makefile | 6 +- tests/femu-csd/README.md | 28 +++- tests/femu-csd/csd-passthru.c | 153 ++++++++++++++++-- tests/femu-csd/csd-vadd.c | 19 +++ tests/femu-csd/femu-csd-kernel.h | 14 ++ 13 files changed, 507 insertions(+), 33 deletions(-) create mode 100644 tests/femu-csd/csd-vadd.c create mode 100644 tests/femu-csd/femu-csd-kernel.h diff --git a/README.md b/README.md index 36078aaef18..375a54e0aa3 100644 --- a/README.md +++ b/README.md @@ -391,13 +391,15 @@ csf_runtime_scale=3 # Runtime scaling factor - Normal NVMe read/write compatibility in CSD mode - Vendor commands for AFDM allocation, read/write, NVM-to-AFDM copy - Minimal phantom CSF download/execute path +- Shared-library CSF download/execute path using a host-visible `.so` +- Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` - Group/QoS command metadata - Guest-side passthrough tests in `tests/femu-csd/` The initial CSD path does not require a CEMU-specific Linux kernel, FDMFS, or a fixed VM image. Advanced CEMU features such as VM freezing, virtual clock -changes, full eBPF execution, and FDMFS are intentionally kept out of the -default path while the base mode is upstreamed. +changes, and FDMFS are intentionally kept out of the default path while the base +mode is upstreamed. --- diff --git a/femu-scripts/femu-compile.sh b/femu-scripts/femu-compile.sh index 8cd710cc20d..73698fb9907 100755 --- a/femu-scripts/femu-compile.sh +++ b/femu-scripts/femu-compile.sh @@ -1,10 +1,24 @@ #!/bin/bash NRCPUS="$(cat /proc/cpuinfo | grep "vendor_id" | wc -l)" +FEMU_CONFIGURE_OPTS="" + +for arg in "$@"; do + case "$arg" in + --enable-csd-ubpf) + FEMU_CONFIGURE_OPTS="${FEMU_CONFIGURE_OPTS} --enable-femu-csd-ubpf" + ;; + *) + echo "Unknown option: $arg" + echo "Usage: $0 [--enable-csd-ubpf]" + exit 1 + ;; + esac +done make clean # --disable-werror --extra-cflags=-w --disable-git-update -../configure --enable-kvm --target-list=x86_64-softmmu --enable-slirp +../configure --enable-kvm --target-list=x86_64-softmmu --enable-slirp ${FEMU_CONFIGURE_OPTS} make -j $NRCPUS echo "" diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index 7134f5e7334..5e915993fbe 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -1,8 +1,15 @@ #include "qemu/osdep.h" #include "qapi/error.h" +#include #include "csd.h" +#ifdef CONFIG_FEMU_CSD_UBPF +#include +#endif + +typedef int64_t (*FemuCsdSharedLibFn)(FemuCsdArgs *args); + typedef struct FemuCsdAfdm { uint32_t id; uint64_t size; @@ -16,6 +23,12 @@ typedef struct FemuCsdProgram { uint16_t runtime_scale; uint64_t size; uint8_t *data; + GModule *module; + FemuCsdSharedLibFn shared_lib_fn; +#ifdef CONFIG_FEMU_CSD_UBPF + struct ubpf_vm *ubpf_vm; + ubpf_jit_fn ubpf_jit_fn; +#endif } FemuCsdProgram; typedef struct FemuCsdGroup { @@ -70,6 +83,23 @@ static void csd_afdm_free(gpointer opaque) g_free(afdm); } +static void csd_program_unload(FemuCsdProgram *program) +{ + if (program->module) { + g_module_close(program->module); + program->module = NULL; + program->shared_lib_fn = NULL; + } + +#ifdef CONFIG_FEMU_CSD_UBPF + if (program->ubpf_vm) { + ubpf_destroy(program->ubpf_vm); + program->ubpf_vm = NULL; + program->ubpf_jit_fn = NULL; + } +#endif +} + static void csd_program_free(gpointer opaque) { FemuCsdProgram *program = opaque; @@ -78,6 +108,7 @@ static void csd_program_free(gpointer opaque) return; } + csd_program_unload(program); g_free(program->data); g_free(program); } @@ -203,6 +234,119 @@ static uint16_t csd_check_afdm_range(FemuCsdAfdm *afdm, uint64_t offset, return NVME_SUCCESS; } +static uint16_t csd_parse_program(FemuCsdProgram *program, const char **path, + const char **symbol) +{ + char *name; + size_t path_len; + size_t symbol_len; + + if (!program->data || program->size < 3) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + name = memchr(program->data, '\0', program->size); + if (!name || name == (char *)program->data) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + path_len = name - (char *)program->data; + if (path_len + 1 >= program->size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + *path = (const char *)program->data; + *symbol = name + 1; + symbol_len = strnlen(*symbol, program->size - path_len - 1); + if (symbol_len == 0 || path_len + symbol_len + 2 > program->size) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + return NVME_SUCCESS; +} + +static uint16_t csd_load_shared_lib(FemuCsdProgram *program) +{ + const char *path; + const char *symbol; + gpointer fn = NULL; + uint16_t status; + + status = csd_parse_program(program, &path, &symbol); + if (status) { + return status; + } + + program->module = g_module_open(path, G_MODULE_BIND_LOCAL); + if (!program->module) { + femu_err("CSD: failed to load shared library %s: %s\n", path, + g_module_error()); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (!g_module_symbol(program->module, symbol, &fn) || !fn) { + femu_err("CSD: failed to find shared library symbol %s: %s\n", symbol, + g_module_error()); + csd_program_unload(program); + return NVME_INVALID_FIELD | NVME_DNR; + } + + program->shared_lib_fn = (FemuCsdSharedLibFn)fn; + return NVME_SUCCESS; +} + +static uint16_t csd_load_ubpf(FemuCsdProgram *program, bool jit) +{ +#ifdef CONFIG_FEMU_CSD_UBPF + const char *path; + const char *symbol; + g_autofree char *elf = NULL; + gsize elf_size = 0; + g_autoptr(GError) err = NULL; + char *errmsg = NULL; + uint16_t status; + + status = csd_parse_program(program, &path, &symbol); + if (status) { + return status; + } + + if (!g_file_get_contents(path, &elf, &elf_size, &err)) { + femu_err("CSD: failed to read uBPF program %s: %s\n", path, + err ? err->message : "unknown error"); + return NVME_INVALID_FIELD | NVME_DNR; + } + + program->ubpf_vm = ubpf_create(); + if (!program->ubpf_vm) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (ubpf_load_elf(program->ubpf_vm, elf, elf_size, symbol, &errmsg) < 0) { + femu_err("CSD: failed to load uBPF ELF %s:%s: %s\n", path, symbol, + errmsg ? errmsg : "unknown error"); + free(errmsg); + csd_program_unload(program); + return NVME_INVALID_FIELD | NVME_DNR; + } + + if (jit) { + program->ubpf_jit_fn = ubpf_compile(program->ubpf_vm, &errmsg); + if (!program->ubpf_jit_fn) { + femu_err("CSD: failed to JIT uBPF ELF %s:%s: %s\n", path, symbol, + errmsg ? errmsg : "unknown error"); + free(errmsg); + csd_program_unload(program); + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + return NVME_SUCCESS; +#else + return NVME_INVALID_FIELD | NVME_DNR; +#endif +} + static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { FemuCsdState *csd = csd_state(n); @@ -214,8 +358,16 @@ static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t id; uint16_t status = NVME_SUCCESS; - if (download->csf_type != NVME_CSD_CSF_TYPE_PHANTOM || - size > UINT32_MAX) { + if (size > UINT32_MAX) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (download->csf_type) { + case NVME_CSD_CSF_TYPE_PHANTOM: + case NVME_CSD_CSF_TYPE_EBPF: + case NVME_CSD_CSF_TYPE_SHARED_LIB: + break; + default: return NVME_INVALID_FIELD | NVME_DNR; } @@ -234,6 +386,24 @@ static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) } } + switch (program->type) { + case NVME_CSD_CSF_TYPE_PHANTOM: + break; + case NVME_CSD_CSF_TYPE_SHARED_LIB: + status = csd_load_shared_lib(program); + break; + case NVME_CSD_CSF_TYPE_EBPF: + status = csd_load_ubpf(program, download->csf_flags & 0x1); + break; + default: + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + if (status) { + csd_program_free(program); + return status; + } + qemu_mutex_lock(&csd->lock); id = csd->next_csf_id++; if (id == 0) { @@ -256,11 +426,17 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t in_id = le32_to_cpu(exec->in_afdm_id); uint32_t out_id = le32_to_cpu(exec->out_afdm_id); uint32_t group_id = le32_to_cpu(exec->group); + uint32_t cparam1 = le32_to_cpu(exec->cparam1); uint32_t runtime = le32_to_cpu(exec->runtime); FemuCsdProgram *program; - FemuCsdAfdm *in; - FemuCsdAfdm *out; + FemuCsdAfdm *in = NULL; + FemuCsdAfdm *out = NULL; uint64_t copy_size; + void *mr_addr[2] = { 0 }; + long long mr_len[2] = { 0 }; + FemuCsdArgs args = { 0 }; + int64_t result = 0; + uint16_t status = NVME_SUCCESS; qemu_mutex_lock(&csd->lock); program = csd_get_program_locked(csd, csf_id); @@ -274,26 +450,83 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) return NVME_INVALID_FIELD | NVME_DNR; } - if (in_id != 0 || out_id != 0) { - in = csd_get_afdm_locked(csd, in_id); - out = csd_get_afdm_locked(csd, out_id); - if (!in || !out) { - qemu_mutex_unlock(&csd->lock); - return NVME_INVALID_FIELD | NVME_DNR; - } - - copy_size = MIN(in->size, out->size); - memcpy(out->data, in->data, copy_size); - req->cqe.n.result = copy_size > UINT32_MAX ? UINT32_MAX : copy_size; - } else { - req->cqe.n.result = 0; + in = in_id ? csd_get_afdm_locked(csd, in_id) : NULL; + out = out_id ? csd_get_afdm_locked(csd, out_id) : NULL; + if ((in_id && !in) || (out_id && !out)) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; } if (runtime == 0) { runtime = program->runtime; } + + switch (program->type) { + case NVME_CSD_CSF_TYPE_PHANTOM: + if (in && out) { + copy_size = MIN(in->size, out->size); + memcpy(out->data, in->data, copy_size); + result = copy_size > UINT32_MAX ? UINT32_MAX : copy_size; + } + break; + case NVME_CSD_CSF_TYPE_SHARED_LIB: + if (!program->shared_lib_fn || !in || !out) { + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + mr_addr[0] = out->data; + mr_addr[1] = in->data; + mr_len[0] = out->size; + mr_len[1] = in->size; + args.numr = 2; + args.mr_addr = mr_addr; + args.mr_len = mr_len; + args.cparam1 = cparam1; + result = program->shared_lib_fn(&args); + break; + case NVME_CSD_CSF_TYPE_EBPF: +#ifdef CONFIG_FEMU_CSD_UBPF + if (!program->ubpf_vm || !in || !out) { + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + mr_addr[0] = out->data; + mr_addr[1] = in->data; + mr_len[0] = out->size; + mr_len[1] = in->size; + args.numr = 2; + args.mr_addr = mr_addr; + args.mr_len = mr_len; + args.cparam1 = cparam1; + if (program->ubpf_jit_fn) { + result = program->ubpf_jit_fn(&args, sizeof(args)); + } else { + uint64_t ubpf_result; + + if (ubpf_exec(program->ubpf_vm, &args, sizeof(args), + &ubpf_result) < 0) { + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + result = ubpf_result; + } +#else + status = NVME_INVALID_FIELD | NVME_DNR; +#endif + break; + default: + status = NVME_INVALID_FIELD | NVME_DNR; + break; + } + if (!status) { + req->cqe.n.result = result > UINT32_MAX ? UINT32_MAX : result; + } qemu_mutex_unlock(&csd->lock); + if (status) { + return status; + } + if (runtime) { req->reqlat += runtime; req->expire_time += runtime; diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h index 01294bd3c7b..46d127057b2 100644 --- a/hw/femu/csd/csd.h +++ b/hw/femu/csd/csd.h @@ -96,10 +96,20 @@ typedef struct QEMU_PACKED NvmeCsdExecCmd { uint32_t in_afdm_id; uint32_t out_afdm_id; uint32_t group; - uint32_t rsvd14; + uint32_t cparam1; uint32_t runtime; } NvmeCsdExecCmd; +typedef struct FemuCsdArgs { + int numr; + void **mr_addr; + long long *mr_len; + long long cparam1; + long long cparam2; + void *data_buffer; + long long buffer_len; +} QEMU_PACKED FemuCsdArgs; + typedef struct QEMU_PACKED NvmeCsdReadAfdmCmd { uint8_t opcode; uint8_t flags; diff --git a/hw/femu/meson.build b/hw/femu/meson.build index 5f0c7dc2ac1..4a3af5f8913 100644 --- a/hw/femu/meson.build +++ b/hw/femu/meson.build @@ -1 +1,10 @@ -system_ss.add(when: 'CONFIG_FEMU_PCI', if_true: files('dma.c', 'intr.c', 'nvme-util.c', 'nvme-admin.c', 'nvme-io.c', 'femu.c', 'nossd/nop.c', 'nand/nand.c', 'timing-model/timing.c', 'ocssd/oc12.c', 'ocssd/oc20.c', 'zns/zns.c', 'zns/zftl.c','bbssd/bb.c', 'bbssd/ftl.c', 'csd/csd.c', 'lib/pqueue.c', 'lib/rte_ring.c', 'backend/dram.c')) +system_ss.add(when: 'CONFIG_FEMU_PCI', + if_true: files('dma.c', 'intr.c', 'nvme-util.c', + 'nvme-admin.c', 'nvme-io.c', 'femu.c', + 'nossd/nop.c', 'nand/nand.c', + 'timing-model/timing.c', 'ocssd/oc12.c', + 'ocssd/oc20.c', 'zns/zns.c', 'zns/zftl.c', + 'bbssd/bb.c', 'bbssd/ftl.c', 'csd/csd.c', + 'lib/pqueue.c', 'lib/rte_ring.c', + 'backend/dram.c')) +system_ss.add(when: 'CONFIG_FEMU_CSD_UBPF', if_true: femu_csd_ubpf) diff --git a/meson.build b/meson.build index 50c774a1955..50d4efe6df9 100644 --- a/meson.build +++ b/meson.build @@ -2292,6 +2292,8 @@ endif # libbpf bpf_version = '1.1.0' libbpf = dependency('libbpf', version: '>=' + bpf_version, required: get_option('bpf'), method: 'pkg-config') +femu_csd_ubpf = dependency('ubpf', required: get_option('femu_csd_ubpf'), + method: 'pkg-config') if libbpf.found() and not cc.links(''' #include #include @@ -2529,6 +2531,7 @@ config_host_data.set('CONFIG_HEXAGON_IDEF_PARSER', get_option('hexagon_idef_pars config_host_data.set('CONFIG_LIBATTR', have_old_libattr) config_host_data.set('CONFIG_LIBCAP_NG', libcap_ng.found()) config_host_data.set('CONFIG_EBPF', libbpf.found()) +config_host_data.set('CONFIG_FEMU_CSD_UBPF', femu_csd_ubpf.found()) config_host_data.set('CONFIG_AF_XDP', libxdp.found()) config_host_data.set('CONFIG_LIBDAXCTL', libdaxctl.found()) config_host_data.set('CONFIG_LIBISCSI', libiscsi.found()) diff --git a/meson_options.txt b/meson_options.txt index fff1521e580..9d66b8e9d8c 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -146,6 +146,8 @@ option('blkio', type : 'feature', value : 'auto', description: 'libblkio block device driver') option('bpf', type : 'feature', value : 'auto', description: 'eBPF support') +option('femu_csd_ubpf', type : 'feature', value : 'disabled', + description: 'uBPF runtime support for FEMU CSD') option('cocoa', type : 'feature', value : 'auto', description: 'Cocoa user interface (macOS only)') option('curl', type : 'feature', value : 'auto', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 0ebe6bc52a6..f24746909a4 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -116,6 +116,7 @@ meson_options_help() { printf "%s\n" ' dmg dmg image format support' printf "%s\n" ' docs Documentations build support' printf "%s\n" ' dsound DirectSound sound support' + printf "%s\n" ' femu-csd-ubpf uBPF runtime support for FEMU CSD' printf "%s\n" ' fuse FUSE block device export' printf "%s\n" ' fuse-lseek SEEK_HOLE/SEEK_DATA support for FUSE exports' printf "%s\n" ' gcrypt libgcrypt cryptography support' @@ -315,6 +316,8 @@ _meson_option_parse() { --enable-fdt) printf "%s" -Dfdt=enabled ;; --disable-fdt) printf "%s" -Dfdt=disabled ;; --enable-fdt=*) quote_sh "-Dfdt=$2" ;; + --enable-femu-csd-ubpf) printf "%s" -Dfemu_csd_ubpf=enabled ;; + --disable-femu-csd-ubpf) printf "%s" -Dfemu_csd_ubpf=disabled ;; --enable-fuse) printf "%s" -Dfuse=enabled ;; --disable-fuse) printf "%s" -Dfuse=disabled ;; --enable-fuse-lseek) printf "%s" -Dfuse_lseek=enabled ;; diff --git a/tests/femu-csd/Makefile b/tests/femu-csd/Makefile index 0d79875336d..48e0a532c6e 100644 --- a/tests/femu-csd/Makefile +++ b/tests/femu-csd/Makefile @@ -1,7 +1,8 @@ CC ?= gcc CFLAGS ?= -Wall -Wextra -O2 -g +SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC -TARGETS := csd-passthru +TARGETS := csd-passthru csd-vadd.so .PHONY: all clean @@ -10,5 +11,8 @@ all: $(TARGETS) csd-passthru: csd-passthru.c $(CC) $(CFLAGS) -o $@ $< +csd-vadd.so: csd-vadd.c femu-csd-kernel.h + $(CC) $(SO_CFLAGS) -shared -o $@ $< + clean: rm -f $(TARGETS) diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index 47a0a47f0f3..12f58302990 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -24,14 +24,33 @@ The smoke test sends these CSD commands through `NVME_IOCTL_IO_CMD`: - execute the phantom CSF - deallocate AFDM +Build also produces `csd-vadd.so`, a minimal shared-library CSF used by the +shared-library smoke path. Because the shared library is loaded by the QEMU +process on the host, pass a host-visible path to the guest tool: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke-so /home//FEMU/tests/femu-csd/csd-vadd.so +``` + +The shared-library CSF ABI is: + +```c +int64_t kernel(struct femu_csd_args *args); +``` + +For the direct AFDM execution path, `args->mr_addr[0]` is the output AFDM and +`args->mr_addr[1]` is the input AFDM. + Other useful command-level checks: ```bash sudo ./csd-passthru /dev/nvme0n1 alloc 4096 sudo ./csd-passthru /dev/nvme0n1 download-phantom 1000 +sudo ./csd-passthru /dev/nvme0n1 download-so /host/path/csd-vadd.so csd_vadd +sudo ./csd-passthru /dev/nvme0n1 download-ubpf /host/path/csf.bpf.o csf_symbol 0 sudo ./csd-passthru /dev/nvme0n1 create-group 5 0 0 sudo ./csd-passthru /dev/nvme0n1 set-qos 6 0 0 -sudo ./csd-passthru /dev/nvme0n1 exec 0 +sudo ./csd-passthru /dev/nvme0n1 exec 0 sudo ./csd-passthru /dev/nvme0n1 delete-group sudo ./csd-passthru /dev/nvme0n1 nvm-to-afdm 0 0 0 ``` @@ -43,3 +62,10 @@ The tool assumes FEMU was started with CSD mode enabled, for example: ``` It intentionally does not depend on CEMU's modified kernel driver or FDMFS. + +Shared-library CSF support is enabled in the default FEMU build. uBPF support +is optional because it depends on an external `ubpf` library. Build FEMU with: + +```bash +./femu-compile.sh --enable-csd-ubpf +``` diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index 50761955678..d2a6593c3ea 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -27,6 +27,8 @@ enum { enum { CSD_CSF_TYPE_PHANTOM = 0, + CSD_CSF_TYPE_EBPF = 1, + CSD_CSF_TYPE_SHARED_LIB = 3, }; static void usage(const char *prog) @@ -37,14 +39,18 @@ static void usage(const char *prog) " %s /dev/nvmeXnY alloc \n" " %s /dev/nvmeXnY dealloc \n" " %s /dev/nvmeXnY download-phantom \n" - " %s /dev/nvmeXnY exec [runtime-ns] [group-id]\n" + " %s /dev/nvmeXnY download-so [runtime-ns]\n" + " %s /dev/nvmeXnY download-ubpf [jit:0|1] [runtime-ns]\n" + " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1]\n" + " %s /dev/nvmeXnY smoke-so \n" " %s /dev/nvmeXnY create-group \n" " %s /dev/nvmeXnY set-qos \n" " %s /dev/nvmeXnY delete-group \n" " %s /dev/nvmeXnY write \n" " %s /dev/nvmeXnY read \n" " %s /dev/nvmeXnY nvm-to-afdm \n", - prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog); + prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, + prog, prog, prog); } static uint64_t parse_u64(const char *s, const char *name) @@ -90,8 +96,46 @@ static uint32_t csd_download_phantom(int fd, uint32_t runtime) return cmd.result; } +static uint32_t csd_download_program(int fd, uint8_t type, const char *path, + const char *symbol, uint8_t flags, + uint32_t runtime) +{ + size_t path_len = strlen(path); + size_t symbol_len = strlen(symbol); + size_t size = path_len + symbol_len + 2; + void *buf = NULL; + struct nvme_passthru_cmd cmd = { + .opcode = CSD_CMD_DOWNLOAD, + .nsid = 1, + .data_len = size, + .cdw10 = (uint32_t)size, + .cdw11 = (uint32_t)(size >> 32), + .cdw12 = type | ((uint32_t)flags << 8), + .cdw13 = runtime, + }; + + if (posix_memalign(&buf, 4096, (size + 4095) & ~4095ULL)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + memset(buf, 0, (size + 4095) & ~4095ULL); + memcpy(buf, path, path_len); + memcpy((char *)buf + path_len + 1, symbol, symbol_len); + cmd.addr = (uintptr_t)buf; + + if (submit(fd, &cmd)) { + free(buf); + exit(EXIT_FAILURE); + } + + free(buf); + return cmd.result; +} + static void csd_exec(int fd, uint32_t csf_id, uint32_t in_afdm_id, - uint32_t out_afdm_id, uint32_t runtime, uint32_t group_id) + uint32_t out_afdm_id, uint32_t runtime, + uint32_t group_id, uint32_t cparam1) { struct nvme_passthru_cmd cmd = { .opcode = CSD_CMD_EXEC, @@ -100,7 +144,7 @@ static void csd_exec(int fd, uint32_t csf_id, uint32_t in_afdm_id, .cdw11 = in_afdm_id, .cdw12 = out_afdm_id, .cdw13 = group_id, - .cdw14 = 0, + .cdw14 = cparam1, .cdw15 = runtime, }; @@ -288,7 +332,7 @@ static void run_smoke(int fd) csf_id = csd_download_phantom(fd, 1000); printf("downloaded phantom CSF id=%" PRIu32 "\n", csf_id); - csd_exec(fd, csf_id, id, id, 0, 0); + csd_exec(fd, csf_id, id, id, 0, 0, 0); printf("phantom exec passed\n"); csd_dealloc(fd, id); @@ -298,6 +342,56 @@ static void run_smoke(int fd) free(read_buf); } +static void run_so_smoke(int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint32_t csf_id; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i * 2; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + csf_id = csd_download_program(fd, CSD_CSF_TYPE_SHARED_LIB, so_path, + "csd_vadd", 0, 0); + printf("downloaded shared-library CSF id=%" PRIu32 "\n", csf_id); + csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = i + i * 2; + + if (output[i] != expected) { + fprintf(stderr, "shared-library smoke mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("shared-library smoke passed\n"); + + free(input); + free(output); +} + int main(int argc, char **argv) { const char *dev; @@ -319,6 +413,12 @@ int main(int argc, char **argv) if (!strcmp(op, "smoke")) { run_smoke(fd); + } else if (!strcmp(op, "smoke-so")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_so_smoke(fd, argv[3]); } else if (!strcmp(op, "alloc")) { uint64_t size; uint32_t id; @@ -345,25 +445,60 @@ int main(int argc, char **argv) } id = csd_download_phantom(fd, (uint32_t)parse_u64(argv[3], "runtime-ns")); printf("%" PRIu32 "\n", id); - } else if (!strcmp(op, "exec")) { + } else if (!strcmp(op, "download-so")) { + uint32_t runtime = 0; + uint32_t id; + + if (argc < 5 || argc > 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc == 6) { + runtime = (uint32_t)parse_u64(argv[5], "runtime-ns"); + } + id = csd_download_program(fd, CSD_CSF_TYPE_SHARED_LIB, argv[3], + argv[4], 0, runtime); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "download-ubpf")) { uint32_t runtime = 0; + uint8_t jit = 0; + uint32_t id; + if (argc < 5 || argc > 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc >= 6) { + jit = (uint8_t)parse_u64(argv[5], "jit"); + } + if (argc == 7) { + runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); + } + id = csd_download_program(fd, CSD_CSF_TYPE_EBPF, argv[3], argv[4], + jit ? 1 : 0, runtime); + printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "exec")) { + uint32_t runtime = 0; uint32_t group_id = 0; + uint32_t cparam1 = 0; - if (argc < 6 || argc > 8) { + if (argc < 6 || argc > 9) { usage(argv[0]); return EXIT_FAILURE; } if (argc >= 7) { runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); } - if (argc == 8) { + if (argc >= 8) { group_id = (uint32_t)parse_u64(argv[7], "group-id"); } + if (argc == 9) { + cparam1 = (uint32_t)parse_u64(argv[8], "cparam1"); + } csd_exec(fd, (uint32_t)parse_u64(argv[3], "csf-id"), (uint32_t)parse_u64(argv[4], "in-afdm-id"), (uint32_t)parse_u64(argv[5], "out-afdm-id"), - runtime, group_id); + runtime, group_id, cparam1); } else if (!strcmp(op, "create-group")) { uint32_t id; diff --git a/tests/femu-csd/csd-vadd.c b/tests/femu-csd/csd-vadd.c new file mode 100644 index 00000000000..0bcc2a63287 --- /dev/null +++ b/tests/femu-csd/csd-vadd.c @@ -0,0 +1,19 @@ +#include +#include "femu-csd-kernel.h" + +int64_t csd_vadd(struct femu_csd_args *args) +{ + int *out = args->mr_addr[0]; + int *in = args->mr_addr[1]; + long long count = args->cparam1; + + if (args->numr < 2 || count < 0) { + return -1; + } + + for (long long i = 0; i < count; i++) { + out[i] = in[i * 2] + in[i * 2 + 1]; + } + + return count; +} diff --git a/tests/femu-csd/femu-csd-kernel.h b/tests/femu-csd/femu-csd-kernel.h new file mode 100644 index 00000000000..9b50ca89e0b --- /dev/null +++ b/tests/femu-csd/femu-csd-kernel.h @@ -0,0 +1,14 @@ +#ifndef FEMU_CSD_KERNEL_H +#define FEMU_CSD_KERNEL_H + +struct femu_csd_args { + int numr; + void **mr_addr; + long long *mr_len; + long long cparam1; + long long cparam2; + void *data_buffer; + long long buffer_len; +} __attribute__((packed)); + +#endif From 0d9b3f40e0e8b210394a9759170ca5eca822d266 Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 00:00:43 +0800 Subject: [PATCH 04/10] hw/femu: route CSD I/O through SSD FTL --- README.md | 17 +++++++++-------- hw/femu/csd/csd.c | 7 +++++++ hw/femu/nvme-io.c | 2 +- tests/femu-csd/README.md | 11 ++++++++--- tests/femu-csd/csd-passthru.c | 6 +++--- 5 files changed, 28 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 375a54e0aa3..e471ea8d9c4 100644 --- a/README.md +++ b/README.md @@ -63,12 +63,12 @@ FEMU bridges the gap between SSD hardware platforms and SSD simulators by provid ## Features -| Feature | BlackBox | WhiteBox | ZNS | NoSSD | -|---------|----------|----------|-----|--------| -| **FTL Management** | Device-side | Host-side | Zone-based | None | -| **Use Cases** | Commercial SSD simulation | OpenChannel SSD research | ZNS research | SCM emulation | -| **Latency Model** | Realistic NAND | Realistic NAND | Zone-optimized | Ultra-low (sub-10μs) | -| **Guest Support** | Full NVMe | OpenChannel 1.2/2.0 | NVMe ZNS | NVMe basic | +| Feature | BlackBox | WhiteBox | ZNS | NoSSD | CSD | +|---------|----------|----------|-----|--------|-----| +| **FTL Management** | Device-side | Host-side | Zone-based | None | Device-side | +| **Use Cases** | Commercial SSD simulation | OpenChannel SSD research | ZNS research | SCM emulation | Computational storage research | +| **Latency Model** | Realistic NAND | Realistic NAND | Zone-optimized | Ultra-low (sub-10μs) | Realistic NAND + compute runtime | +| **Guest Support** | Full NVMe | OpenChannel 1.2/2.0 | NVMe ZNS | NVMe basic | Full NVMe + CSD commands | --- @@ -388,10 +388,11 @@ csf_runtime_scale=3 # Runtime scaling factor ``` **Current Scope:** -- Normal NVMe read/write compatibility in CSD mode +- Normal NVMe read/write through the device-side BBSSD FTL path in CSD mode - Vendor commands for AFDM allocation, read/write, NVM-to-AFDM copy - Minimal phantom CSF download/execute path -- Shared-library CSF download/execute path using a host-visible `.so` +- Shared-library CSF download/execute path using the original CEMU + `path\0symbol\0` program descriptor format - Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` - Group/QoS command metadata - Guest-side passthrough tests in `tests/femu-csd/` diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index 5e915993fbe..3f4e131f813 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -3,6 +3,7 @@ #include #include "csd.h" +#include "../bbssd/ftl.h" #ifdef CONFIG_FEMU_CSD_UBPF #include @@ -125,6 +126,7 @@ static void csd_init_ctrl_str(FemuCtrl *n) static void csd_init(FemuCtrl *n, Error **errp) { FemuCsdState *csd; + struct ssd *ssd; csd_check_size(); @@ -155,6 +157,11 @@ static void csd_init(FemuCtrl *n, Error **errp) csd_init_ctrl_str(n); + ssd = n->ssd = g_malloc0(sizeof(*ssd)); + ssd->dataplane_started_ptr = &n->dataplane_started; + ssd->ssdname = (char *)n->devname; + ssd_init(n); + csd = g_new0(FemuCsdState, 1); csd->params = n->csd_params; csd->fdm_capacity = n->csd_params.fdm_size_mb * MiB; diff --git a/hw/femu/nvme-io.c b/hw/femu/nvme-io.c index c1cabb09dbb..5d0317c16c1 100644 --- a/hw/femu/nvme-io.c +++ b/hw/femu/nvme-io.c @@ -145,7 +145,7 @@ static void nvme_process_cq_cpl(void *arg, int index_poller) int rc; int i; - if (BBSSD(n) || ZNSSD(n)) { + if (BBSSD(n) || ZNSSD(n) || CSD(n)) { rp = n->to_poller[index_poller]; } diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index 12f58302990..d762b89af1b 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -25,8 +25,10 @@ The smoke test sends these CSD commands through `NVME_IOCTL_IO_CMD`: - deallocate AFDM Build also produces `csd-vadd.so`, a minimal shared-library CSF used by the -shared-library smoke path. Because the shared library is loaded by the QEMU -process on the host, pass a host-visible path to the guest tool: +shared-library smoke path. The program download payload follows the original +CEMU descriptor format: a PRP data buffer containing `path\0symbol\0`. Because +the shared library is loaded by the QEMU process on the host, the `path` string +inside that descriptor must be visible to the host QEMU process: ```bash sudo ./csd-passthru /dev/nvme0n1 smoke-so /home//FEMU/tests/femu-csd/csd-vadd.so @@ -61,7 +63,10 @@ The tool assumes FEMU was started with CSD mode enabled, for example: -device femu,femu_mode=4,fdm_size=64 ``` -It intentionally does not depend on CEMU's modified kernel driver or FDMFS. +It intentionally does not depend on CEMU's modified kernel driver or FDMFS. CSD +mode still uses FEMU's device-side BBSSD FTL path for normal NVM read/write +requests; the passthrough commands validate the additional computational +storage interface. Shared-library CSF support is enabled in the default FEMU build. uBPF support is optional because it depends on an external `ubpf` library. Build FEMU with: diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index d2a6593c3ea..857c481056f 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -39,10 +39,10 @@ static void usage(const char *prog) " %s /dev/nvmeXnY alloc \n" " %s /dev/nvmeXnY dealloc \n" " %s /dev/nvmeXnY download-phantom \n" - " %s /dev/nvmeXnY download-so [runtime-ns]\n" - " %s /dev/nvmeXnY download-ubpf [jit:0|1] [runtime-ns]\n" + " %s /dev/nvmeXnY download-so [runtime-ns]\n" + " %s /dev/nvmeXnY download-ubpf [jit:0|1] [runtime-ns]\n" " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1]\n" - " %s /dev/nvmeXnY smoke-so \n" + " %s /dev/nvmeXnY smoke-so \n" " %s /dev/nvmeXnY create-group \n" " %s /dev/nvmeXnY set-qos \n" " %s /dev/nvmeXnY delete-group \n" From 2bfc150d5c2ae58c8d2787ee2f3fe2d7bd9091c8 Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 00:06:16 +0800 Subject: [PATCH 05/10] hw/femu: add CSD program lifecycle commands --- README.md | 1 + hw/femu/csd/csd.c | 172 +++++++++++++++++++++++++++++++--- hw/femu/csd/csd.h | 44 +++++++++ tests/femu-csd/README.md | 12 +++ tests/femu-csd/csd-passthru.c | 125 +++++++++++++++++++++++- 5 files changed, 339 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index e471ea8d9c4..9366e81d260 100644 --- a/README.md +++ b/README.md @@ -393,6 +393,7 @@ csf_runtime_scale=3 # Runtime scaling factor - Minimal phantom CSF download/execute path - Shared-library CSF download/execute path using the original CEMU `path\0symbol\0` program descriptor format +- CEMU-style admin commands for CSF load/unload and activate/deactivate - Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` - Group/QoS command metadata - Guest-side passthrough tests in `tests/femu-csd/` diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index 3f4e131f813..b0dc3e44c8b 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -20,9 +20,14 @@ typedef struct FemuCsdAfdm { typedef struct FemuCsdProgram { uint32_t id; uint8_t type; + bool active; + bool indirect; + bool loading; uint32_t runtime; uint16_t runtime_scale; uint64_t size; + uint64_t load_size; + uint64_t pid; uint8_t *data; GModule *module; FemuCsdSharedLibFn shared_lib_fn; @@ -65,6 +70,8 @@ static void csd_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeCsdCreateGroupCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdSetQosCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeleteGroupCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdLoadProgramCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdProgramActivationCmd) != 64); } static FemuCsdState *csd_state(FemuCtrl *n) @@ -354,6 +361,138 @@ static uint16_t csd_load_ubpf(FemuCsdProgram *program, bool jit) #endif } +static uint16_t csd_load_program_data(FemuCsdProgram *program, bool jit) +{ + switch (program->type) { + case NVME_CSD_CSF_TYPE_PHANTOM: + return NVME_SUCCESS; + case NVME_CSD_CSF_TYPE_SHARED_LIB: + return csd_load_shared_lib(program); + case NVME_CSD_CSF_TYPE_EBPF: + return csd_load_ubpf(program, jit); + default: + return NVME_INVALID_FIELD | NVME_DNR; + } +} + +static uint16_t csd_compute_load(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdLoadProgramCmd *load = (NvmeCsdLoadProgramCmd *)cmd; + uint16_t pind = le16_to_cpu(load->pind); + uint32_t psize = le32_to_cpu(load->psize); + uint32_t numb = le32_to_cpu(load->numb); + uint32_t loff = le32_to_cpu(load->loff); + uint64_t pid = le64_to_cpu(load->pid); + uint64_t prp1 = le64_to_cpu(load->prp1); + uint64_t prp2 = le64_to_cpu(load->prp2); + FemuCsdProgram *program; + uint16_t status = NVME_SUCCESS; + + if (pind == 0 || psize > UINT32_MAX || loff > psize || + numb > psize - loff) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + + if (load->sel) { + program = csd_get_program_locked(csd, pind); + if (!program) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + if (program->active) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + g_hash_table_remove(csd->programs, GUINT_TO_POINTER((uint32_t)pind)); + qemu_mutex_unlock(&csd->lock); + return NVME_SUCCESS; + } + + if (loff == 0) { + program = g_new0(FemuCsdProgram, 1); + program->id = pind; + program->type = load->ptype; + program->runtime = le32_to_cpu(load->runtime); + program->runtime_scale = le16_to_cpu(load->runtime_scale); + program->size = psize; + program->pid = pid; + program->indirect = load->indirect; + program->loading = true; + if (psize) { + program->data = g_malloc0(psize); + } + g_hash_table_replace(csd->programs, GUINT_TO_POINTER((uint32_t)pind), + program); + } else { + program = csd_get_program_locked(csd, pind); + if (!program || program->size != psize || + (load->pit == 1 && program->pid != pid) || + program->type != load->ptype) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + } + + if (numb) { + status = dma_write_prp(n, program->data + loff, numb, prp1, prp2); + if (status) { + qemu_mutex_unlock(&csd->lock); + return status | NVME_DNR; + } + program->load_size += numb; + } + + if (program->load_size == program->size) { + status = csd_load_program_data(program, load->jit); + if (!status) { + program->loading = false; + program->active = false; + } + } + + qemu_mutex_unlock(&csd->lock); + return status; +} + +static uint16_t csd_compute_activate(FemuCtrl *n, NvmeCmd *cmd) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdProgramActivationCmd *activation = + (NvmeCsdProgramActivationCmd *)cmd; + uint16_t pind = activation->pind; + uint8_t sel = activation->sel; + FemuCsdProgram *program; + + if (pind == 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + program = csd_get_program_locked(csd, pind); + if (!program || program->loading) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + switch (sel) { + case 0: + program->active = false; + break; + case 1: + program->active = true; + break; + default: + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_unlock(&csd->lock); + return NVME_SUCCESS; +} + static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { FemuCsdState *csd = csd_state(n); @@ -383,6 +522,7 @@ static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) program->runtime = le32_to_cpu(download->runtime); program->runtime_scale = le16_to_cpu(download->runtime_scale); program->size = size; + program->active = true; if (size) { program->data = g_malloc0(size); @@ -393,19 +533,7 @@ static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) } } - switch (program->type) { - case NVME_CSD_CSF_TYPE_PHANTOM: - break; - case NVME_CSD_CSF_TYPE_SHARED_LIB: - status = csd_load_shared_lib(program); - break; - case NVME_CSD_CSF_TYPE_EBPF: - status = csd_load_ubpf(program, download->csf_flags & 0x1); - break; - default: - status = NVME_INVALID_FIELD | NVME_DNR; - break; - } + status = csd_load_program_data(program, download->csf_flags & 0x1); if (status) { csd_program_free(program); return status; @@ -451,6 +579,10 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) qemu_mutex_unlock(&csd->lock); return NVME_INVALID_FIELD | NVME_DNR; } + if (!program->active) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } if (group_id != 0 && !csd_get_group_locked(csd, group_id)) { qemu_mutex_unlock(&csd->lock); @@ -821,6 +953,18 @@ static uint16_t csd_io_cmd(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, } } +static uint16_t csd_admin_cmd(FemuCtrl *n, NvmeCmd *cmd) +{ + switch (cmd->opcode) { + case NVME_ADM_CMD_CSD_COMPUTE_LOAD: + return csd_compute_load(n, cmd); + case NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE: + return csd_compute_activate(n, cmd); + default: + return NVME_INVALID_OPCODE | NVME_DNR; + } +} + int nvme_register_csd(FemuCtrl *n) { n->ext_ops = (FemuExtCtrlOps) { @@ -829,7 +973,7 @@ int nvme_register_csd(FemuCtrl *n) .exit = csd_exit, .rw_check_req = NULL, .start_ctrl = NULL, - .admin_cmd = NULL, + .admin_cmd = csd_admin_cmd, .io_cmd = csd_io_cmd, .get_log = NULL, }; diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h index 46d127057b2..ae69042d216 100644 --- a/hw/femu/csd/csd.h +++ b/hw/femu/csd/csd.h @@ -16,6 +16,11 @@ enum FemuCsdIoCommands { NVME_CMD_CSD_DELETE_GROUP = 0xf8, }; +enum FemuCsdAdminCommands { + NVME_ADM_CMD_CSD_COMPUTE_LOAD = 0x22, + NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE = 0x23, +}; + enum FemuCsdFdmType { NVME_CSD_FDM_TYPE_HOST = 0, }; @@ -43,6 +48,45 @@ typedef struct QEMU_PACKED NvmeCsdDownloadCmd { uint32_t rsvd15[2]; } NvmeCsdDownloadCmd; +typedef struct QEMU_PACKED NvmeCsdLoadProgramCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint16_t jit:1; + uint16_t rsvd_ctrl:15; + uint16_t runtime_scale; + uint32_t runtime; + uint32_t rsvd4[2]; + uint64_t prp1; + uint64_t prp2; + uint16_t pind; + uint8_t ptype; + uint8_t sel:1; + uint8_t pit:3; + uint8_t indirect:1; + uint8_t rsvd10:3; + uint32_t psize; + uint64_t pid; + uint32_t numb; + uint32_t loff; +} NvmeCsdLoadProgramCmd; + +typedef struct QEMU_PACKED NvmeCsdProgramActivationCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd[4]; + uint64_t prp1; + uint64_t prp2; + uint32_t pind:16; + uint32_t sel:4; + uint32_t rsvd10:12; + uint32_t runtime; + uint32_t rsvd12[4]; +} NvmeCsdProgramActivationCmd; + typedef struct QEMU_PACKED NvmeCsdAllocFdmCmd { uint8_t opcode; uint8_t flags; diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index d762b89af1b..f280d0c0996 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -57,6 +57,18 @@ sudo ./csd-passthru /dev/nvme0n1 delete-group sudo ./csd-passthru /dev/nvme0n1 nvm-to-afdm 0 0 0 ``` +FEMU CSD also accepts the original CEMU program lifecycle admin command +layouts for load/unload (`0x22`) and activate/deactivate (`0x23`). The +lightweight passthrough helper can send those commands to the controller device +without the CEMU kernel driver: + +```bash +sudo ./csd-passthru /dev/nvme0 admin-load-so 1 /host/path/csd-vadd.so csd_vadd +sudo ./csd-passthru /dev/nvme0 admin-activate 1 +sudo ./csd-passthru /dev/nvme0 admin-deactivate 1 +sudo ./csd-passthru /dev/nvme0 admin-unload 1 +``` + The tool assumes FEMU was started with CSD mode enabled, for example: ```bash diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index 857c481056f..680fcca977b 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -13,6 +13,8 @@ #include enum { + CSD_ADM_COMPUTE_LOAD = 0x22, + CSD_ADM_COMPUTE_ACTIVATE = 0x23, CSD_CMD_DOWNLOAD = 0xa1, CSD_CMD_ALLOC_FDM = 0xb0, CSD_CMD_DEALLOC_AFDM = 0xc0, @@ -43,6 +45,11 @@ static void usage(const char *prog) " %s /dev/nvmeXnY download-ubpf [jit:0|1] [runtime-ns]\n" " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1]\n" " %s /dev/nvmeXnY smoke-so \n" + " %s /dev/nvmeX admin-load-so [runtime-ns]\n" + " %s /dev/nvmeX admin-load-phantom \n" + " %s /dev/nvmeX admin-activate \n" + " %s /dev/nvmeX admin-deactivate \n" + " %s /dev/nvmeX admin-unload \n" " %s /dev/nvmeXnY create-group \n" " %s /dev/nvmeXnY set-qos \n" " %s /dev/nvmeXnY delete-group \n" @@ -50,7 +57,7 @@ static void usage(const char *prog) " %s /dev/nvmeXnY read \n" " %s /dev/nvmeXnY nvm-to-afdm \n", prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, - prog, prog, prog); + prog, prog, prog, prog, prog, prog, prog, prog); } static uint64_t parse_u64(const char *s, const char *name) @@ -80,6 +87,83 @@ static int submit(int fd, struct nvme_passthru_cmd *cmd) return ret; } +static int submit_admin(int fd, struct nvme_passthru_cmd *cmd) +{ + int ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd); + + if (ret < 0) { + perror("NVME_IOCTL_ADMIN_CMD"); + return -1; + } + + return ret; +} + +static void csd_admin_load_program(int fd, uint16_t pind, uint8_t type, + const char *path, const char *symbol, + uint8_t flags, uint32_t runtime) +{ + size_t path_len = path ? strlen(path) : 0; + size_t symbol_len = symbol ? strlen(symbol) : 0; + size_t size = path_len + symbol_len + (path ? 2 : 0); + void *buf = NULL; + uint32_t cdw10 = pind | ((uint32_t)type << 16); + struct nvme_passthru_cmd cmd = { + .opcode = CSD_ADM_COMPUTE_LOAD, + .nsid = 1, + .data_len = size, + .cdw2 = ((uint32_t)flags & 0x1), + .cdw3 = runtime, + .cdw10 = cdw10, + .cdw11 = (uint32_t)size, + .cdw14 = (uint32_t)size, + }; + + if (size) { + if (posix_memalign(&buf, 4096, (size + 4095) & ~4095ULL)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + memset(buf, 0, (size + 4095) & ~4095ULL); + memcpy(buf, path, path_len); + memcpy((char *)buf + path_len + 1, symbol, symbol_len); + cmd.addr = (uintptr_t)buf; + } + + if (submit_admin(fd, &cmd)) { + free(buf); + exit(EXIT_FAILURE); + } + + free(buf); +} + +static void csd_admin_unload_program(int fd, uint16_t pind) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_ADM_COMPUTE_LOAD, + .nsid = 1, + .cdw10 = pind | (1U << 24), + }; + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + +static void csd_admin_activation(int fd, uint16_t pind, uint8_t sel) +{ + struct nvme_passthru_cmd cmd = { + .opcode = CSD_ADM_COMPUTE_ACTIVATE, + .nsid = 1, + .cdw10 = pind | ((uint32_t)sel << 16), + }; + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + static uint32_t csd_download_phantom(int fd, uint32_t runtime) { struct nvme_passthru_cmd cmd = { @@ -477,6 +561,45 @@ int main(int argc, char **argv) id = csd_download_program(fd, CSD_CSF_TYPE_EBPF, argv[3], argv[4], jit ? 1 : 0, runtime); printf("%" PRIu32 "\n", id); + } else if (!strcmp(op, "admin-load-so")) { + uint32_t runtime = 0; + + if (argc < 6 || argc > 7) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc == 7) { + runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); + } + csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), + CSD_CSF_TYPE_SHARED_LIB, argv[4], argv[5], + 0, runtime); + } else if (!strcmp(op, "admin-load-phantom")) { + if (argc != 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), + CSD_CSF_TYPE_PHANTOM, NULL, NULL, 0, + (uint32_t)parse_u64(argv[4], "runtime-ns")); + } else if (!strcmp(op, "admin-activate")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_activation(fd, (uint16_t)parse_u64(argv[3], "pind"), 1); + } else if (!strcmp(op, "admin-deactivate")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_activation(fd, (uint16_t)parse_u64(argv[3], "pind"), 0); + } else if (!strcmp(op, "admin-unload")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_unload_program(fd, (uint16_t)parse_u64(argv[3], "pind")); } else if (!strcmp(op, "exec")) { uint32_t runtime = 0; uint32_t group_id = 0; From 0a684d6f836fe6b42ff7a40c516af81bdf5ebf3d Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 09:44:25 +0800 Subject: [PATCH 06/10] hw/femu: use CSD program lifecycle for tests --- README.md | 5 +- hw/femu/csd/csd.c | 65 ------------ hw/femu/csd/csd.h | 17 ---- tests/femu-csd/README.md | 19 ++-- tests/femu-csd/csd-passthru.c | 183 ++++++++++++++-------------------- 5 files changed, 87 insertions(+), 202 deletions(-) diff --git a/README.md b/README.md index 9366e81d260..85035ba6809 100644 --- a/README.md +++ b/README.md @@ -390,9 +390,8 @@ csf_runtime_scale=3 # Runtime scaling factor **Current Scope:** - Normal NVMe read/write through the device-side BBSSD FTL path in CSD mode - Vendor commands for AFDM allocation, read/write, NVM-to-AFDM copy -- Minimal phantom CSF download/execute path -- Shared-library CSF download/execute path using the original CEMU - `path\0symbol\0` program descriptor format +- Phantom and shared-library CSF load/execute path using the original CEMU + lifecycle and `path\0symbol\0` program descriptor format - CEMU-style admin commands for CSF load/unload and activate/deactivate - Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` - Group/QoS command metadata diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index b0dc3e44c8b..7cd816de0bb 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -50,7 +50,6 @@ typedef struct FemuCsdState { uint64_t fdm_capacity; uint64_t fdm_used; uint32_t next_afdm_id; - uint32_t next_csf_id; uint32_t next_group_id; GHashTable *afdms; GHashTable *programs; @@ -60,7 +59,6 @@ typedef struct FemuCsdState { static void csd_check_size(void) { - QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDownloadCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdAllocFdmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeallocAfdmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdNvmToAfdmCmd) != 64); @@ -173,7 +171,6 @@ static void csd_init(FemuCtrl *n, Error **errp) csd->params = n->csd_params; csd->fdm_capacity = n->csd_params.fdm_size_mb * MiB; csd->next_afdm_id = 1; - csd->next_csf_id = 1; csd->next_group_id = 1; csd->afdms = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, csd_afdm_free); @@ -493,66 +490,6 @@ static uint16_t csd_compute_activate(FemuCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } -static uint16_t csd_download(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) -{ - FemuCsdState *csd = csd_state(n); - NvmeCsdDownloadCmd *download = (NvmeCsdDownloadCmd *)cmd; - uint64_t size = le64_to_cpu(download->size); - uint64_t prp1 = le64_to_cpu(download->prp1); - uint64_t prp2 = le64_to_cpu(download->prp2); - FemuCsdProgram *program; - uint32_t id; - uint16_t status = NVME_SUCCESS; - - if (size > UINT32_MAX) { - return NVME_INVALID_FIELD | NVME_DNR; - } - - switch (download->csf_type) { - case NVME_CSD_CSF_TYPE_PHANTOM: - case NVME_CSD_CSF_TYPE_EBPF: - case NVME_CSD_CSF_TYPE_SHARED_LIB: - break; - default: - return NVME_INVALID_FIELD | NVME_DNR; - } - - program = g_new0(FemuCsdProgram, 1); - program->type = download->csf_type; - program->runtime = le32_to_cpu(download->runtime); - program->runtime_scale = le16_to_cpu(download->runtime_scale); - program->size = size; - program->active = true; - - if (size) { - program->data = g_malloc0(size); - status = dma_write_prp(n, program->data, size, prp1, prp2); - if (status) { - csd_program_free(program); - return status | NVME_DNR; - } - } - - status = csd_load_program_data(program, download->csf_flags & 0x1); - if (status) { - csd_program_free(program); - return status; - } - - qemu_mutex_lock(&csd->lock); - id = csd->next_csf_id++; - if (id == 0) { - csd->next_csf_id = 1; - id = csd->next_csf_id++; - } - program->id = id; - g_hash_table_insert(csd->programs, GUINT_TO_POINTER(id), program); - qemu_mutex_unlock(&csd->lock); - - req->cqe.n.result = id; - return NVME_SUCCESS; -} - static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { FemuCsdState *csd = csd_state(n); @@ -928,8 +865,6 @@ static uint16_t csd_io_cmd(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, case NVME_CMD_READ: case NVME_CMD_WRITE: return nvme_rw(n, ns, cmd, req); - case NVME_CMD_CSD_DOWNLOAD: - return csd_download(n, cmd, req); case NVME_CMD_CSD_ALLOC_FDM: return csd_alloc_fdm(n, cmd, req); case NVME_CMD_CSD_DEALLOC_AFDM: diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h index ae69042d216..20d76f4a928 100644 --- a/hw/femu/csd/csd.h +++ b/hw/femu/csd/csd.h @@ -4,7 +4,6 @@ #include "../nvme.h" enum FemuCsdIoCommands { - NVME_CMD_CSD_DOWNLOAD = 0xa1, NVME_CMD_CSD_ALLOC_FDM = 0xb0, NVME_CMD_CSD_DEALLOC_AFDM = 0xc0, NVME_CMD_CSD_NVM_TO_AFDM = 0xd0, @@ -32,22 +31,6 @@ enum FemuCsdCsfType { NVME_CSD_CSF_TYPE_SHARED_LIB = 3, }; -typedef struct QEMU_PACKED NvmeCsdDownloadCmd { - uint8_t opcode; - uint8_t flags; - uint16_t cid; - uint32_t nsid; - uint64_t rsvd2[2]; - uint64_t prp1; - uint64_t prp2; - uint64_t size; - uint8_t csf_type; - uint8_t csf_flags; - uint16_t runtime_scale; - uint32_t runtime; - uint32_t rsvd15[2]; -} NvmeCsdDownloadCmd; - typedef struct QEMU_PACKED NvmeCsdLoadProgramCmd { uint8_t opcode; uint8_t flags; diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index f280d0c0996..106a92de60f 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -15,19 +15,21 @@ Run a basic AFDM smoke test against a namespace device: sudo ./csd-passthru /dev/nvme0n1 smoke ``` -The smoke test sends these CSD commands through `NVME_IOCTL_IO_CMD`: +The smoke test sends AFDM commands through `NVME_IOCTL_IO_CMD` and uses the +original CEMU-style admin lifecycle commands through `NVME_IOCTL_ADMIN_CMD`: - allocate AFDM - write AFDM - read AFDM -- download a phantom CSF +- load and activate a phantom CSF - execute the phantom CSF +- deactivate and unload the phantom CSF - deallocate AFDM Build also produces `csd-vadd.so`, a minimal shared-library CSF used by the -shared-library smoke path. The program download payload follows the original -CEMU descriptor format: a PRP data buffer containing `path\0symbol\0`. Because -the shared library is loaded by the QEMU process on the host, the `path` string +shared-library smoke path. The program load payload follows the original CEMU +descriptor format: a PRP data buffer containing `path\0symbol\0`. Because the +shared library is loaded by the QEMU process on the host, the `path` string inside that descriptor must be visible to the host QEMU process: ```bash @@ -47,9 +49,6 @@ Other useful command-level checks: ```bash sudo ./csd-passthru /dev/nvme0n1 alloc 4096 -sudo ./csd-passthru /dev/nvme0n1 download-phantom 1000 -sudo ./csd-passthru /dev/nvme0n1 download-so /host/path/csd-vadd.so csd_vadd -sudo ./csd-passthru /dev/nvme0n1 download-ubpf /host/path/csf.bpf.o csf_symbol 0 sudo ./csd-passthru /dev/nvme0n1 create-group 5 0 0 sudo ./csd-passthru /dev/nvme0n1 set-qos 6 0 0 sudo ./csd-passthru /dev/nvme0n1 exec 0 @@ -59,11 +58,13 @@ sudo ./csd-passthru /dev/nvme0n1 nvm-to-afdm 0 0 0 FEMU CSD also accepts the original CEMU program lifecycle admin command layouts for load/unload (`0x22`) and activate/deactivate (`0x23`). The -lightweight passthrough helper can send those commands to the controller device +lightweight passthrough helper sends those commands to the controller device without the CEMU kernel driver: ```bash +sudo ./csd-passthru /dev/nvme0 admin-load-phantom 1 1000 sudo ./csd-passthru /dev/nvme0 admin-load-so 1 /host/path/csd-vadd.so csd_vadd +sudo ./csd-passthru /dev/nvme0 admin-load-ubpf 1 /host/path/csf.bpf.o csf_symbol 0 sudo ./csd-passthru /dev/nvme0 admin-activate 1 sudo ./csd-passthru /dev/nvme0 admin-deactivate 1 sudo ./csd-passthru /dev/nvme0 admin-unload 1 diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index 680fcca977b..007925b2d6e 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -15,7 +16,6 @@ enum { CSD_ADM_COMPUTE_LOAD = 0x22, CSD_ADM_COMPUTE_ACTIVATE = 0x23, - CSD_CMD_DOWNLOAD = 0xa1, CSD_CMD_ALLOC_FDM = 0xb0, CSD_CMD_DEALLOC_AFDM = 0xc0, CSD_CMD_NVM_TO_AFDM = 0xd0, @@ -40,12 +40,10 @@ static void usage(const char *prog) " %s /dev/nvmeXnY smoke\n" " %s /dev/nvmeXnY alloc \n" " %s /dev/nvmeXnY dealloc \n" - " %s /dev/nvmeXnY download-phantom \n" - " %s /dev/nvmeXnY download-so [runtime-ns]\n" - " %s /dev/nvmeXnY download-ubpf [jit:0|1] [runtime-ns]\n" " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1]\n" " %s /dev/nvmeXnY smoke-so \n" " %s /dev/nvmeX admin-load-so [runtime-ns]\n" + " %s /dev/nvmeX admin-load-ubpf [jit:0|1] [runtime-ns]\n" " %s /dev/nvmeX admin-load-phantom \n" " %s /dev/nvmeX admin-activate \n" " %s /dev/nvmeX admin-deactivate \n" @@ -57,7 +55,7 @@ static void usage(const char *prog) " %s /dev/nvmeXnY read \n" " %s /dev/nvmeXnY nvm-to-afdm \n", prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, - prog, prog, prog, prog, prog, prog, prog, prog); + prog, prog, prog, prog, prog, prog); } static uint64_t parse_u64(const char *s, const char *name) @@ -99,6 +97,39 @@ static int submit_admin(int fd, struct nvme_passthru_cmd *cmd) return ret; } +static int open_admin_from_namespace(const char *dev) +{ + char ctrl[PATH_MAX]; + char *base; + char *name; + char *ns; + int fd; + + if (strlen(dev) >= sizeof(ctrl)) { + fprintf(stderr, "device path too long: %s\n", dev); + exit(EXIT_FAILURE); + } + + strcpy(ctrl, dev); + base = strrchr(ctrl, '/'); + name = base ? base + 1 : ctrl; + ns = strstr(name, "nvme"); + if (ns) { + ns = strchr(ns + strlen("nvme"), 'n'); + if (ns) { + *ns = '\0'; + } + } + + fd = open(ctrl, O_RDWR); + if (fd < 0) { + perror(ctrl); + exit(EXIT_FAILURE); + } + + return fd; +} + static void csd_admin_load_program(int fd, uint16_t pind, uint8_t type, const char *path, const char *symbol, uint8_t flags, uint32_t runtime) @@ -164,59 +195,6 @@ static void csd_admin_activation(int fd, uint16_t pind, uint8_t sel) } } -static uint32_t csd_download_phantom(int fd, uint32_t runtime) -{ - struct nvme_passthru_cmd cmd = { - .opcode = CSD_CMD_DOWNLOAD, - .nsid = 1, - .cdw12 = CSD_CSF_TYPE_PHANTOM, - .cdw13 = runtime, - }; - - if (submit(fd, &cmd)) { - exit(EXIT_FAILURE); - } - - return cmd.result; -} - -static uint32_t csd_download_program(int fd, uint8_t type, const char *path, - const char *symbol, uint8_t flags, - uint32_t runtime) -{ - size_t path_len = strlen(path); - size_t symbol_len = strlen(symbol); - size_t size = path_len + symbol_len + 2; - void *buf = NULL; - struct nvme_passthru_cmd cmd = { - .opcode = CSD_CMD_DOWNLOAD, - .nsid = 1, - .data_len = size, - .cdw10 = (uint32_t)size, - .cdw11 = (uint32_t)(size >> 32), - .cdw12 = type | ((uint32_t)flags << 8), - .cdw13 = runtime, - }; - - if (posix_memalign(&buf, 4096, (size + 4095) & ~4095ULL)) { - perror("posix_memalign"); - exit(EXIT_FAILURE); - } - - memset(buf, 0, (size + 4095) & ~4095ULL); - memcpy(buf, path, path_len); - memcpy((char *)buf + path_len + 1, symbol, symbol_len); - cmd.addr = (uintptr_t)buf; - - if (submit(fd, &cmd)) { - free(buf); - exit(EXIT_FAILURE); - } - - free(buf); - return cmd.result; -} - static void csd_exec(int fd, uint32_t csf_id, uint32_t in_afdm_id, uint32_t out_afdm_id, uint32_t runtime, uint32_t group_id, uint32_t cparam1) @@ -384,14 +362,15 @@ static void dump_hex(const uint8_t *buf, size_t size) } } -static void run_smoke(int fd) +static void run_smoke(const char *dev, int fd) { const char *msg = "femu-csd-afdm-smoke"; size_t msg_len = strlen(msg) + 1; uint8_t *write_buf = NULL; uint8_t *read_buf = NULL; uint32_t id; - uint32_t csf_id; + uint16_t csf_id = 1; + int admin_fd; if (posix_memalign((void **)&write_buf, 4096, 4096) || posix_memalign((void **)&read_buf, 4096, 4096)) { @@ -414,9 +393,15 @@ static void run_smoke(int fd) exit(EXIT_FAILURE); } - csf_id = csd_download_phantom(fd, 1000); - printf("downloaded phantom CSF id=%" PRIu32 "\n", csf_id); + admin_fd = open_admin_from_namespace(dev); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_PHANTOM, + NULL, NULL, 0, 1000); + csd_admin_activation(admin_fd, csf_id, 1); + printf("loaded phantom CSF id=%" PRIu16 "\n", csf_id); csd_exec(fd, csf_id, id, id, 0, 0, 0); + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + close(admin_fd); printf("phantom exec passed\n"); csd_dealloc(fd, id); @@ -426,14 +411,15 @@ static void run_smoke(int fd) free(read_buf); } -static void run_so_smoke(int fd, const char *so_path) +static void run_so_smoke(const char *dev, int fd, const char *so_path) { enum { COUNT = 1024 }; int *input = NULL; int *output = NULL; uint32_t in_id; uint32_t out_id; - uint32_t csf_id; + uint16_t csf_id = 1; + int admin_fd; if (posix_memalign((void **)&input, 4096, 8192) || posix_memalign((void **)&output, 4096, 4096)) { @@ -452,9 +438,11 @@ static void run_so_smoke(int fd, const char *so_path) csd_write(fd, in_id, 0, input, 8192); csd_write(fd, out_id, 0, output, 4096); - csf_id = csd_download_program(fd, CSD_CSF_TYPE_SHARED_LIB, so_path, - "csd_vadd", 0, 0); - printf("downloaded shared-library CSF id=%" PRIu32 "\n", csf_id); + admin_fd = open_admin_from_namespace(dev); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + printf("loaded shared-library CSF id=%" PRIu16 "\n", csf_id); csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT); csd_read(fd, out_id, 0, output, 4096); @@ -470,6 +458,9 @@ static void run_so_smoke(int fd, const char *so_path) csd_dealloc(fd, in_id); csd_dealloc(fd, out_id); + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + close(admin_fd); printf("shared-library smoke passed\n"); free(input); @@ -496,13 +487,13 @@ int main(int argc, char **argv) } if (!strcmp(op, "smoke")) { - run_smoke(fd); + run_smoke(dev, fd); } else if (!strcmp(op, "smoke-so")) { if (argc != 4) { usage(argv[0]); return EXIT_FAILURE; } - run_so_smoke(fd, argv[3]); + run_so_smoke(dev, fd, argv[3]); } else if (!strcmp(op, "alloc")) { uint64_t size; uint32_t id; @@ -520,60 +511,36 @@ int main(int argc, char **argv) return EXIT_FAILURE; } csd_dealloc(fd, (uint32_t)parse_u64(argv[3], "id")); - } else if (!strcmp(op, "download-phantom")) { - uint32_t id; - - if (argc != 4) { - usage(argv[0]); - return EXIT_FAILURE; - } - id = csd_download_phantom(fd, (uint32_t)parse_u64(argv[3], "runtime-ns")); - printf("%" PRIu32 "\n", id); - } else if (!strcmp(op, "download-so")) { - uint32_t runtime = 0; - uint32_t id; - - if (argc < 5 || argc > 6) { - usage(argv[0]); - return EXIT_FAILURE; - } - if (argc == 6) { - runtime = (uint32_t)parse_u64(argv[5], "runtime-ns"); - } - id = csd_download_program(fd, CSD_CSF_TYPE_SHARED_LIB, argv[3], - argv[4], 0, runtime); - printf("%" PRIu32 "\n", id); - } else if (!strcmp(op, "download-ubpf")) { + } else if (!strcmp(op, "admin-load-so")) { uint32_t runtime = 0; - uint8_t jit = 0; - uint32_t id; - if (argc < 5 || argc > 7) { + if (argc < 6 || argc > 7) { usage(argv[0]); return EXIT_FAILURE; } - if (argc >= 6) { - jit = (uint8_t)parse_u64(argv[5], "jit"); - } if (argc == 7) { runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); } - id = csd_download_program(fd, CSD_CSF_TYPE_EBPF, argv[3], argv[4], - jit ? 1 : 0, runtime); - printf("%" PRIu32 "\n", id); - } else if (!strcmp(op, "admin-load-so")) { + csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), + CSD_CSF_TYPE_SHARED_LIB, argv[4], argv[5], + 0, runtime); + } else if (!strcmp(op, "admin-load-ubpf")) { uint32_t runtime = 0; + uint8_t jit = 0; - if (argc < 6 || argc > 7) { + if (argc < 6 || argc > 8) { usage(argv[0]); return EXIT_FAILURE; } - if (argc == 7) { - runtime = (uint32_t)parse_u64(argv[6], "runtime-ns"); + if (argc >= 7) { + jit = (uint8_t)parse_u64(argv[6], "jit"); + } + if (argc == 8) { + runtime = (uint32_t)parse_u64(argv[7], "runtime-ns"); } csd_admin_load_program(fd, (uint16_t)parse_u64(argv[3], "pind"), - CSD_CSF_TYPE_SHARED_LIB, argv[4], argv[5], - 0, runtime); + CSD_CSF_TYPE_EBPF, argv[4], argv[5], + jit ? 1 : 0, runtime); } else if (!strcmp(op, "admin-load-phantom")) { if (argc != 5) { usage(argv[0]); From be56d81a5fd86f88d3b24b3df574968b5ee06a9e Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 09:50:42 +0800 Subject: [PATCH 07/10] hw/femu: check CSD NVM copy against FTL map --- hw/femu/csd/csd.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index 7cd816de0bb..248012c5103 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -276,6 +276,43 @@ static uint16_t csd_parse_program(FemuCsdProgram *program, const char **path, return NVME_SUCCESS; } +static uint16_t csd_check_nvm_ftl_range(FemuCtrl *n, uint64_t slba, + uint64_t nlb, uint64_t *mapped_pages) +{ + struct ssd *ssd = n->ssd; + struct ssdparams *spp; + uint64_t start_lpn; + uint64_t end_lpn; + + if (!ssd || !ssd->maptbl) { + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; + } + + spp = &ssd->sp; + if (spp->secs_per_pg <= 0 || spp->tt_pgs == 0 || nlb == 0) { + return NVME_INTERNAL_DEV_ERROR | NVME_DNR; + } + + start_lpn = slba / spp->secs_per_pg; + if (slba > UINT64_MAX - nlb + 1) { + return NVME_LBA_RANGE | NVME_DNR; + } + + end_lpn = (slba + nlb - 1) / spp->secs_per_pg; + if (end_lpn >= spp->tt_pgs) { + return NVME_LBA_RANGE | NVME_DNR; + } + + *mapped_pages = 0; + for (uint64_t lpn = start_lpn; lpn <= end_lpn; lpn++) { + if (ssd->maptbl[lpn].ppa != UNMAPPED_PPA) { + (*mapped_pages)++; + } + } + + return NVME_SUCCESS; +} + static uint16_t csd_load_shared_lib(FemuCsdProgram *program) { const char *path; @@ -833,14 +870,21 @@ static uint16_t csd_nvm_to_afdm(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, uint64_t nlb = le16_to_cpu(copy->nlb) + 1; uint64_t size = nlb << data_shift; uint64_t nvm_offset = slba << data_shift; + uint64_t mapped_pages; FemuCsdAfdm *afdm; uint16_t status; - if (slba + nlb > le64_to_cpu(ns->id_ns.nsze) || + if (slba > le64_to_cpu(ns->id_ns.nsze) || + nlb > le64_to_cpu(ns->id_ns.nsze) - slba || nvm_offset > n->mbe->size || size > n->mbe->size - nvm_offset) { return NVME_LBA_RANGE | NVME_DNR; } + status = csd_check_nvm_ftl_range(n, slba, nlb, &mapped_pages); + if (status) { + return status; + } + qemu_mutex_lock(&csd->lock); afdm = csd_get_afdm_locked(csd, id); status = csd_check_afdm_range(afdm, offset, size); @@ -855,6 +899,10 @@ static uint16_t csd_nvm_to_afdm(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, } req->cqe.n.result = size; + if (mapped_pages) { + req->reqlat += n->ssd->sp.pg_rd_lat; + req->expire_time += n->ssd->sp.pg_rd_lat; + } return NVME_SUCCESS; } From a0a9735919d3b3bd6f210f01b05a38b8860c5708 Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 11:28:21 +0800 Subject: [PATCH 08/10] tests/femu-csd: expand shared library and latency coverage --- README.md | 7 +- femu-scripts/run-csd.sh | 8 + hw/femu/csd/csd.c | 163 +++++++++---- hw/femu/csd/csd.h | 27 ++- tests/femu-csd/Makefile | 7 +- tests/femu-csd/README.md | 30 ++- tests/femu-csd/csd-original-kernels.cc | 127 ++++++++++ tests/femu-csd/csd-passthru.c | 307 ++++++++++++++++++++++--- tests/femu-csd/csd-vadd.c | 7 + 9 files changed, 601 insertions(+), 82 deletions(-) create mode 100644 tests/femu-csd/csd-original-kernels.cc diff --git a/README.md b/README.md index 85035ba6809..2ec27b2434a 100644 --- a/README.md +++ b/README.md @@ -385,13 +385,18 @@ nr_thread=4 # Number of functional simulation threads time_slice=200000 # Scheduler time slice (ns) context_switch_time=200 # Context switch time (ns) csf_runtime_scale=3 # Runtime scaling factor +pg_rd_lat=40000 # NAND page read latency (ns), used by NVM-to-AFDM +pg_wr_lat=200000 # NAND page program latency (ns) +blk_er_lat=2000000 # NAND block erase latency (ns) +ch_xfer_lat=0 # Channel transfer latency (ns) ``` **Current Scope:** - Normal NVMe read/write through the device-side BBSSD FTL path in CSD mode - Vendor commands for AFDM allocation, read/write, NVM-to-AFDM copy - Phantom and shared-library CSF load/execute path using the original CEMU - lifecycle and `path\0symbol\0` program descriptor format + lifecycle, `path\0symbol\0` program descriptor format, and program execute + fields (`pind`, `numr`, `dlen`, `cparam1`, `cparam2`, `group`, `runtime`) - CEMU-style admin commands for CSF load/unload and activate/deactivate - Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` - Group/QoS command metadata diff --git a/femu-scripts/run-csd.sh b/femu-scripts/run-csd.sh index 8dc7fa98e3c..42e69d8d3b3 100755 --- a/femu-scripts/run-csd.sh +++ b/femu-scripts/run-csd.sh @@ -14,6 +14,10 @@ NR_THREAD=4 TIME_SLICE=200000 CONTEXT_SWITCH_TIME=200 CSF_RUNTIME_SCALE=3 +PG_RD_LAT=40000 +PG_WR_LAT=200000 +BLK_ER_LAT=2000000 +CH_XFER_LAT=0 #----------------------------------------------------------------------- @@ -28,6 +32,10 @@ FEMU_OPTIONS=${FEMU_OPTIONS}",nr_thread=${NR_THREAD}" FEMU_OPTIONS=${FEMU_OPTIONS}",time_slice=${TIME_SLICE}" FEMU_OPTIONS=${FEMU_OPTIONS}",context_switch_time=${CONTEXT_SWITCH_TIME}" FEMU_OPTIONS=${FEMU_OPTIONS}",csf_runtime_scale=${CSF_RUNTIME_SCALE}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pg_rd_lat=${PG_RD_LAT}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pg_wr_lat=${PG_WR_LAT}" +FEMU_OPTIONS=${FEMU_OPTIONS}",blk_er_lat=${BLK_ER_LAT}" +FEMU_OPTIONS=${FEMU_OPTIONS}",ch_xfer_lat=${CH_XFER_LAT}" echo ${FEMU_OPTIONS} diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index 248012c5103..dc129137efc 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -11,6 +11,8 @@ typedef int64_t (*FemuCsdSharedLibFn)(FemuCsdArgs *args); +#define CSD_EXEC_DATA_MAX (1U << 20) + typedef struct FemuCsdAfdm { uint32_t id; uint64_t size; @@ -63,6 +65,7 @@ static void csd_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeallocAfdmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdNvmToAfdmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdExecCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdMemoryRange) != 32); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdReadAfdmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdWriteAfdmCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdCreateGroupCmd) != 64); @@ -527,90 +530,157 @@ static uint16_t csd_compute_activate(FemuCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } +static uint16_t csd_build_exec_args_locked(FemuCsdState *csd, + NvmeCsdMemoryRange *ranges, + uint32_t numr, + FemuCsdArgs *args, + void ***mr_addrp, + long long **mr_lenp) +{ + void **mr_addr = g_new0(void *, numr); + long long *mr_len = g_new0(long long, numr); + + for (uint32_t i = 0; i < numr; i++) { + uint32_t nsid = le32_to_cpu(ranges[i].nsid); + uint32_t len = le32_to_cpu(ranges[i].len); + uint64_t sb = le64_to_cpu(ranges[i].sb); + FemuCsdAfdm *afdm; + + if (nsid != NVME_CSD_MR_AFDM_NSID) { + g_free(mr_addr); + g_free(mr_len); + return NVME_INVALID_FIELD | NVME_DNR; + } + + afdm = csd_get_afdm_locked(csd, sb); + if (!afdm) { + g_free(mr_addr); + g_free(mr_len); + return NVME_INVALID_FIELD | NVME_DNR; + } + if (len == 0) { + len = afdm->size > UINT32_MAX ? UINT32_MAX : afdm->size; + } + if (len > afdm->size) { + g_free(mr_addr); + g_free(mr_len); + return NVME_INVALID_FIELD | NVME_DNR; + } + + mr_addr[i] = afdm->data; + mr_len[i] = len; + } + + args->numr = numr; + args->mr_addr = mr_addr; + args->mr_len = mr_len; + *mr_addrp = mr_addr; + *mr_lenp = mr_len; + + return NVME_SUCCESS; +} + static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) { FemuCsdState *csd = csd_state(n); NvmeCsdExecCmd *exec = (NvmeCsdExecCmd *)cmd; - uint32_t csf_id = le32_to_cpu(exec->csf_id); - uint32_t in_id = le32_to_cpu(exec->in_afdm_id); - uint32_t out_id = le32_to_cpu(exec->out_afdm_id); - uint32_t group_id = le32_to_cpu(exec->group); - uint32_t cparam1 = le32_to_cpu(exec->cparam1); + uint16_t pind = le16_to_cpu(exec->pind); + uint16_t rsid = le16_to_cpu(exec->rsid); + uint32_t numr = le32_to_cpu(exec->numr); + uint32_t dlen = le32_to_cpu(exec->dlen); + uint64_t cparam1 = le64_to_cpu(exec->cparam1); + uint64_t cparam2 = le64_to_cpu(exec->cparam2); + uint32_t group_id = exec->group; uint32_t runtime = le32_to_cpu(exec->runtime); + uint64_t prp1 = le64_to_cpu(exec->prp1); + uint64_t prp2 = le64_to_cpu(exec->prp2); FemuCsdProgram *program; - FemuCsdAfdm *in = NULL; - FemuCsdAfdm *out = NULL; uint64_t copy_size; - void *mr_addr[2] = { 0 }; - long long mr_len[2] = { 0 }; + uint8_t *data = NULL; + NvmeCsdMemoryRange *ranges = NULL; + void **mr_addr = NULL; + long long *mr_len = NULL; FemuCsdArgs args = { 0 }; int64_t result = 0; uint16_t status = NVME_SUCCESS; + if (dlen == 0 && numr > 0) { + dlen = numr * sizeof(NvmeCsdMemoryRange); + } + + if (pind == 0 || rsid != 0 || numr == 0 || + numr > CSD_EXEC_DATA_MAX / sizeof(NvmeCsdMemoryRange)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + if (dlen < numr * sizeof(NvmeCsdMemoryRange) || dlen > CSD_EXEC_DATA_MAX) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + data = g_malloc0(dlen); + status = dma_write_prp(n, data, dlen, prp1, prp2); + if (status) { + g_free(data); + return status; + } + ranges = (NvmeCsdMemoryRange *)data; + qemu_mutex_lock(&csd->lock); - program = csd_get_program_locked(csd, csf_id); + program = csd_get_program_locked(csd, pind); if (!program) { qemu_mutex_unlock(&csd->lock); - return NVME_INVALID_FIELD | NVME_DNR; + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; } if (!program->active) { qemu_mutex_unlock(&csd->lock); - return NVME_INVALID_FIELD | NVME_DNR; + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; } if (group_id != 0 && !csd_get_group_locked(csd, group_id)) { qemu_mutex_unlock(&csd->lock); - return NVME_INVALID_FIELD | NVME_DNR; - } - - in = in_id ? csd_get_afdm_locked(csd, in_id) : NULL; - out = out_id ? csd_get_afdm_locked(csd, out_id) : NULL; - if ((in_id && !in) || (out_id && !out)) { - qemu_mutex_unlock(&csd->lock); - return NVME_INVALID_FIELD | NVME_DNR; + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; } if (runtime == 0) { runtime = program->runtime; } + status = csd_build_exec_args_locked(csd, ranges, numr, &args, + &mr_addr, &mr_len); + if (status) { + qemu_mutex_unlock(&csd->lock); + goto out; + } + args.cparam1 = cparam1; + args.cparam2 = cparam2; + args.data_buffer = dlen > numr * sizeof(NvmeCsdMemoryRange) ? + data + numr * sizeof(NvmeCsdMemoryRange) : NULL; + args.buffer_len = args.data_buffer ? + dlen - numr * sizeof(NvmeCsdMemoryRange) : 0; + switch (program->type) { case NVME_CSD_CSF_TYPE_PHANTOM: - if (in && out) { - copy_size = MIN(in->size, out->size); - memcpy(out->data, in->data, copy_size); - result = copy_size > UINT32_MAX ? UINT32_MAX : copy_size; + if (args.numr >= 2) { + copy_size = MIN(args.mr_len[0], args.mr_len[1]); + memcpy(args.mr_addr[0], args.mr_addr[1], copy_size); + result = copy_size > INT64_MAX ? INT64_MAX : copy_size; } break; case NVME_CSD_CSF_TYPE_SHARED_LIB: - if (!program->shared_lib_fn || !in || !out) { + if (!program->shared_lib_fn) { status = NVME_INVALID_FIELD | NVME_DNR; break; } - mr_addr[0] = out->data; - mr_addr[1] = in->data; - mr_len[0] = out->size; - mr_len[1] = in->size; - args.numr = 2; - args.mr_addr = mr_addr; - args.mr_len = mr_len; - args.cparam1 = cparam1; result = program->shared_lib_fn(&args); break; case NVME_CSD_CSF_TYPE_EBPF: #ifdef CONFIG_FEMU_CSD_UBPF - if (!program->ubpf_vm || !in || !out) { + if (!program->ubpf_vm) { status = NVME_INVALID_FIELD | NVME_DNR; break; } - mr_addr[0] = out->data; - mr_addr[1] = in->data; - mr_len[0] = out->size; - mr_len[1] = in->size; - args.numr = 2; - args.mr_addr = mr_addr; - args.mr_len = mr_len; - args.cparam1 = cparam1; if (program->ubpf_jit_fn) { result = program->ubpf_jit_fn(&args, sizeof(args)); } else { @@ -634,10 +704,12 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) if (!status) { req->cqe.n.result = result > UINT32_MAX ? UINT32_MAX : result; } + g_free(mr_addr); + g_free(mr_len); qemu_mutex_unlock(&csd->lock); if (status) { - return status; + goto out; } if (runtime) { @@ -645,7 +717,9 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->expire_time += runtime; } - return NVME_SUCCESS; +out: + g_free(data); + return status; } static uint16_t csd_normalize_prio(int8_t *prio) @@ -940,6 +1014,7 @@ static uint16_t csd_admin_cmd(FemuCtrl *n, NvmeCmd *cmd) { switch (cmd->opcode) { case NVME_ADM_CMD_CSD_COMPUTE_LOAD: + case NVME_ADM_CMD_CSD_COMPUTE_LOAD_DATA: return csd_compute_load(n, cmd); case NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE: return csd_compute_activate(n, cmd); diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h index 20d76f4a928..5272f811ee7 100644 --- a/hw/femu/csd/csd.h +++ b/hw/femu/csd/csd.h @@ -7,7 +7,7 @@ enum FemuCsdIoCommands { NVME_CMD_CSD_ALLOC_FDM = 0xb0, NVME_CMD_CSD_DEALLOC_AFDM = 0xc0, NVME_CMD_CSD_NVM_TO_AFDM = 0xd0, - NVME_CMD_CSD_EXEC = 0xe0, + NVME_CMD_CSD_EXEC = 0xe1, NVME_CMD_CSD_READ_AFDM = 0xf2, NVME_CMD_CSD_WRITE_AFDM = 0xf5, NVME_CMD_CSD_CREATE_GROUP = 0xf6, @@ -18,8 +18,11 @@ enum FemuCsdIoCommands { enum FemuCsdAdminCommands { NVME_ADM_CMD_CSD_COMPUTE_LOAD = 0x22, NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE = 0x23, + NVME_ADM_CMD_CSD_COMPUTE_LOAD_DATA = 0x25, }; +#define NVME_CSD_MR_AFDM_NSID 0 + enum FemuCsdFdmType { NVME_CSD_FDM_TYPE_HOST = 0, }; @@ -116,17 +119,27 @@ typedef struct QEMU_PACKED NvmeCsdExecCmd { uint8_t flags; uint16_t cid; uint32_t nsid; - uint64_t rsvd2[2]; + uint16_t pind; + uint16_t rsid; + uint32_t numr; + uint32_t dlen; + uint32_t rsvd4; uint64_t prp1; uint64_t prp2; - uint32_t csf_id; - uint32_t in_afdm_id; - uint32_t out_afdm_id; - uint32_t group; - uint32_t cparam1; + uint64_t cparam1; + uint64_t cparam2; + uint32_t group:8; + uint32_t chunk_nlb:24; uint32_t runtime; } NvmeCsdExecCmd; +typedef struct QEMU_PACKED NvmeCsdMemoryRange { + uint32_t nsid; + uint32_t len; + uint64_t sb; + uint64_t rsvd[2]; +} NvmeCsdMemoryRange; + typedef struct FemuCsdArgs { int numr; void **mr_addr; diff --git a/tests/femu-csd/Makefile b/tests/femu-csd/Makefile index 48e0a532c6e..8e990b16cec 100644 --- a/tests/femu-csd/Makefile +++ b/tests/femu-csd/Makefile @@ -1,8 +1,10 @@ CC ?= gcc +CXX ?= c++ CFLAGS ?= -Wall -Wextra -O2 -g SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC +CXX_SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC -TARGETS := csd-passthru csd-vadd.so +TARGETS := csd-passthru csd-vadd.so csd-original-kernels.so .PHONY: all clean @@ -14,5 +16,8 @@ csd-passthru: csd-passthru.c csd-vadd.so: csd-vadd.c femu-csd-kernel.h $(CC) $(SO_CFLAGS) -shared -o $@ $< +csd-original-kernels.so: csd-original-kernels.cc femu-csd-kernel.h + $(CXX) $(CXX_SO_CFLAGS) -shared -o $@ $< -pthread + clean: rm -f $(TARGETS) diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index 106a92de60f..8384a0e7c1d 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -36,14 +36,28 @@ inside that descriptor must be visible to the host QEMU process: sudo ./csd-passthru /dev/nvme0n1 smoke-so /home//FEMU/tests/femu-csd/csd-vadd.so ``` +`make` also builds `csd-original-kernels.so`, which contains small +shared-library ports of the original CEMU `knn`, `sql`, and `grep` kernels. +These tests exercise the same CSD program lifecycle and inline memory range +interface as the vadd test: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke-so-all /home//FEMU/tests/femu-csd/csd-original-kernels.so +``` + The shared-library CSF ABI is: ```c int64_t kernel(struct femu_csd_args *args); ``` -For the direct AFDM execution path, `args->mr_addr[0]` is the output AFDM and -`args->mr_addr[1]` is the input AFDM. +The execute command uses a CEMU-style program execute command body: +`pind`, `numr`, `dlen`, `cparam1`, `cparam2`, `group`, and `runtime` are sent +in the command. Because this lightweight test path intentionally avoids MRS and +FDMFS, it sends inline memory ranges in the PRP data buffer. In those test +ranges, `nsid=0` means AFDM, `sb` is the AFDM id, and `len=0` means the full +AFDM allocation. The CSF ABI then sees `args->mr_addr[0]` as the output AFDM +and `args->mr_addr[1]` as the input AFDM. Other useful command-level checks: @@ -51,11 +65,17 @@ Other useful command-level checks: sudo ./csd-passthru /dev/nvme0n1 alloc 4096 sudo ./csd-passthru /dev/nvme0n1 create-group 5 0 0 sudo ./csd-passthru /dev/nvme0n1 set-qos 6 0 0 -sudo ./csd-passthru /dev/nvme0n1 exec 0 +sudo ./csd-passthru /dev/nvme0n1 exec 0 sudo ./csd-passthru /dev/nvme0n1 delete-group sudo ./csd-passthru /dev/nvme0n1 nvm-to-afdm 0 0 0 +sudo ./csd-passthru /dev/nvme0n1 bench 4096 32 +sudo ./csd-passthru /dev/nvme0n1 bench 65536 16 ``` +The `bench` command reports wall-clock average latency for AFDM write, AFDM +read, and NVM-to-AFDM copy. It is intended as a regression check for the CSD +command path, not a final paper-level benchmark harness. + FEMU CSD also accepts the original CEMU program lifecycle admin command layouts for load/unload (`0x22`) and activate/deactivate (`0x23`). The lightweight passthrough helper sends those commands to the controller device @@ -87,3 +107,7 @@ is optional because it depends on an external `ubpf` library. Build FEMU with: ```bash ./femu-compile.sh --enable-csd-ubpf ``` + +The original CEMU LZ4 and uBPF tests are not part of the default smoke suite: +LZ4 requires an additional host dependency, and uBPF requires FEMU to be built +with `--enable-csd-ubpf`. diff --git a/tests/femu-csd/csd-original-kernels.cc b/tests/femu-csd/csd-original-kernels.cc new file mode 100644 index 00000000000..63eb53b3111 --- /dev/null +++ b/tests/femu-csd/csd-original-kernels.cc @@ -0,0 +1,127 @@ +#include +#include +#include +#include + +#include "femu-csd-kernel.h" + +struct KnnNode { + char tag[64]; + char vector[4096]; +}; + +static int knn_distance(const int *query, const char *vector) +{ + int distance = 0; + + for (size_t i = 0; i < 4096; ++i) { + int diff = query[i] - (vector[i] - '0'); + + distance += diff * diff; + } + + return distance; +} + +static void knn_chunk(const KnnNode *nodes, const int *query, + size_t start, size_t end, int *distances) +{ + for (size_t i = start; i < end; ++i) { + distances[i] = knn_distance(query, nodes[i].vector); + } +} + +extern "C" long long csd_knn(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const KnnNode *nodes = static_cast(args->mr_addr[0]); + int *output = static_cast(args->mr_addr[1]); + size_t nr_vector = args->mr_len[0] / static_cast(sizeof(KnnNode)); + int query[4096] = { 0 }; + size_t nr_threads = std::min(2, std::max(1, nr_vector)); + size_t chunk = (nr_vector + nr_threads - 1) / nr_threads; + std::vector threads; + + for (size_t t = 0; t < nr_threads; ++t) { + size_t start = t * chunk; + size_t end = std::min(start + chunk, nr_vector); + + if (start < end) { + threads.emplace_back(knn_chunk, nodes, query, start, end, output); + } + } + + for (auto &thread : threads) { + thread.join(); + } + + return nr_vector; +} + +static long long sql_query_records(const char *data, size_t start, size_t end, + int year_lower, int year_upper, char *output) +{ + static constexpr int record_length = 32; + long long output_size = 0; + + for (size_t i = start; i + record_length <= end; i += record_length) { + const char *record = data + i; + int year = ((record[30] - '0') << 8) | static_cast(record[31] - '0'); + + if (year >= year_lower && year <= year_upper) { + memcpy(output + output_size, record, record_length); + output_size += record_length; + } + } + + return output_size; +} + +extern "C" long long csd_sql(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const char *data = static_cast(args->mr_addr[0]); + char *output = static_cast(args->mr_addr[1]); + int year_lower = args->cparam1 ? args->cparam1 : 50; + int year_upper = args->cparam2 ? args->cparam2 : 60; + + return sql_query_records(data, 0, args->mr_len[0], year_lower, year_upper, output); +} + +static long long grep_rows(const char *data, int rows, int cols, const char *pattern) +{ + int pattern_length = strlen(pattern); + long long matches = 0; + + for (int r = 0; r < rows; ++r) { + const char *line = data + r * cols; + + for (int c = 0; c <= cols - pattern_length; ++c) { + if (!strncmp(line + c, pattern, pattern_length)) { + matches++; + } + } + } + + return matches * 8; +} + +extern "C" long long csd_grep(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const char *data = static_cast(args->mr_addr[0]); + const char *pattern = static_cast(args->mr_addr[1]); + int cols = args->cparam2 ? args->cparam2 : 1024; + int rows = args->cparam1 ? args->cparam1 : args->mr_len[0] / cols; + + return grep_rows(data, rows, cols, pattern); +} diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index 007925b2d6e..9140604d262 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -11,15 +11,17 @@ #include #include #include +#include #include enum { CSD_ADM_COMPUTE_LOAD = 0x22, + CSD_ADM_COMPUTE_LOAD_DATA = 0x25, CSD_ADM_COMPUTE_ACTIVATE = 0x23, CSD_CMD_ALLOC_FDM = 0xb0, CSD_CMD_DEALLOC_AFDM = 0xc0, CSD_CMD_NVM_TO_AFDM = 0xd0, - CSD_CMD_EXEC = 0xe0, + CSD_CMD_EXEC = 0xe1, CSD_CMD_READ_AFDM = 0xf2, CSD_CMD_WRITE_AFDM = 0xf5, CSD_CMD_CREATE_GROUP = 0xf6, @@ -33,6 +35,36 @@ enum { CSD_CSF_TYPE_SHARED_LIB = 3, }; +enum { + CSD_MR_AFDM_NSID = 0, +}; + +struct csd_memory_range { + uint32_t nsid; + uint32_t len; + uint64_t sb; + uint64_t rsvd[2]; +} __attribute__((packed)); + +struct csd_program_execute_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint16_t pind; + uint16_t rsid; + uint32_t numr; + uint32_t dlen; + uint32_t rsvd; + uint64_t prp1; + uint64_t prp2; + uint64_t cparam1; + uint64_t cparam2; + uint32_t group:8; + uint32_t chunk_nlb:24; + uint32_t runtime; +} __attribute__((packed)); + static void usage(const char *prog) { fprintf(stderr, @@ -40,8 +72,10 @@ static void usage(const char *prog) " %s /dev/nvmeXnY smoke\n" " %s /dev/nvmeXnY alloc \n" " %s /dev/nvmeXnY dealloc \n" - " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1]\n" + " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1] [cparam2]\n" " %s /dev/nvmeXnY smoke-so \n" + " %s /dev/nvmeXnY smoke-so-all \n" + " %s /dev/nvmeXnY bench \n" " %s /dev/nvmeX admin-load-so [runtime-ns]\n" " %s /dev/nvmeX admin-load-ubpf [jit:0|1] [runtime-ns]\n" " %s /dev/nvmeX admin-load-phantom \n" @@ -55,7 +89,15 @@ static void usage(const char *prog) " %s /dev/nvmeXnY read \n" " %s /dev/nvmeXnY nvm-to-afdm \n", prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, - prog, prog, prog, prog, prog, prog); + prog, prog, prog, prog, prog, prog, prog, prog); +} + +static uint64_t monotonic_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec * 1000000000ULL + ts.tv_nsec; } static uint64_t parse_u64(const char *s, const char *name) @@ -81,8 +123,13 @@ static int submit(int fd, struct nvme_passthru_cmd *cmd) perror("NVME_IOCTL_IO_CMD"); return -1; } + if (ret > 0) { + fprintf(stderr, "NVME_IOCTL_IO_CMD status=0x%x result=0x%x\n", + ret, cmd->result); + return -1; + } - return ret; + return 0; } static int submit_admin(int fd, struct nvme_passthru_cmd *cmd) @@ -93,8 +140,13 @@ static int submit_admin(int fd, struct nvme_passthru_cmd *cmd) perror("NVME_IOCTL_ADMIN_CMD"); return -1; } + if (ret > 0) { + fprintf(stderr, "NVME_IOCTL_ADMIN_CMD status=0x%x result=0x%x\n", + ret, cmd->result); + return -1; + } - return ret; + return 0; } static int open_admin_from_namespace(const char *dev) @@ -140,7 +192,7 @@ static void csd_admin_load_program(int fd, uint16_t pind, uint8_t type, void *buf = NULL; uint32_t cdw10 = pind | ((uint32_t)type << 16); struct nvme_passthru_cmd cmd = { - .opcode = CSD_ADM_COMPUTE_LOAD, + .opcode = size ? CSD_ADM_COMPUTE_LOAD_DATA : CSD_ADM_COMPUTE_LOAD, .nsid = 1, .data_len = size, .cdw2 = ((uint32_t)flags & 0x1), @@ -195,24 +247,53 @@ static void csd_admin_activation(int fd, uint16_t pind, uint8_t sel) } } -static void csd_exec(int fd, uint32_t csf_id, uint32_t in_afdm_id, - uint32_t out_afdm_id, uint32_t runtime, - uint32_t group_id, uint32_t cparam1) +static uint32_t csd_exec_ranges(int fd, uint32_t pind, uint32_t mr0_afdm_id, + uint32_t mr1_afdm_id, uint32_t runtime, + uint32_t group_id, uint64_t cparam1, + uint64_t cparam2) { - struct nvme_passthru_cmd cmd = { - .opcode = CSD_CMD_EXEC, - .nsid = 1, - .cdw10 = csf_id, - .cdw11 = in_afdm_id, - .cdw12 = out_afdm_id, - .cdw13 = group_id, - .cdw14 = cparam1, - .cdw15 = runtime, + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_program_execute_cmd *exec = + (struct csd_program_execute_cmd *)&cmd; + struct csd_memory_range ranges[2] = { + { + .nsid = CSD_MR_AFDM_NSID, + .len = 0, + .sb = mr0_afdm_id, + }, + { + .nsid = CSD_MR_AFDM_NSID, + .len = 0, + .sb = mr1_afdm_id, + }, }; + exec->opcode = CSD_CMD_EXEC; + exec->nsid = 1; + exec->pind = pind; + exec->numr = 2; + exec->cparam1 = cparam1; + exec->cparam2 = cparam2; + exec->group = group_id; + exec->runtime = runtime; + + cmd.addr = (uintptr_t)ranges; + cmd.data_len = sizeof(ranges); + if (submit(fd, &cmd)) { exit(EXIT_FAILURE); } + + return cmd.result; +} + +static uint32_t csd_exec(int fd, uint32_t pind, uint32_t in_afdm_id, + uint32_t out_afdm_id, uint32_t runtime, + uint32_t group_id, uint64_t cparam1, + uint64_t cparam2) +{ + return csd_exec_ranges(fd, pind, out_afdm_id, in_afdm_id, runtime, + group_id, cparam1, cparam2); } static uint32_t csd_create_group(int fd, int8_t prio, uint32_t bandwidth, @@ -398,7 +479,7 @@ static void run_smoke(const char *dev, int fd) NULL, NULL, 0, 1000); csd_admin_activation(admin_fd, csf_id, 1); printf("loaded phantom CSF id=%" PRIu16 "\n", csf_id); - csd_exec(fd, csf_id, id, id, 0, 0, 0); + csd_exec(fd, csf_id, id, id, 0, 0, 0, 0); csd_admin_activation(admin_fd, csf_id, 0); csd_admin_unload_program(admin_fd, csf_id); close(admin_fd); @@ -443,7 +524,7 @@ static void run_so_smoke(const char *dev, int fd, const char *so_path) so_path, "csd_vadd", 0, 0); csd_admin_activation(admin_fd, csf_id, 1); printf("loaded shared-library CSF id=%" PRIu16 "\n", csf_id); - csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT); + csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT, 0); csd_read(fd, out_id, 0, output, 4096); for (int i = 0; i < COUNT; i++) { @@ -467,6 +548,163 @@ static void run_so_smoke(const char *dev, int fd, const char *so_path) free(output); } +static void run_original_so_smoke(const char *dev, int fd, const char *so_path) +{ + int admin_fd = open_admin_from_namespace(dev); + uint32_t in_id; + uint32_t out_id; + uint8_t *input = NULL; + uint8_t *output = NULL; + uint32_t pattern_id; + uint8_t *pattern = NULL; + + if (posix_memalign((void **)&input, 4096, 65536) || + posix_memalign((void **)&output, 4096, 65536) || + posix_memalign((void **)&pattern, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + memset(input, 0, 65536); + memset(output, 0, 65536); + memset(pattern, 0, 4096); + + enum { KNN_NODES = 4, KNN_NODE_SIZE = 4160 }; + for (int n = 0; n < KNN_NODES; n++) { + uint8_t *node = input + n * KNN_NODE_SIZE; + + memset(node, 'A' + n, 64); + memset(node + 64, '0' + n, 4096); + } + in_id = csd_alloc(fd, KNN_NODES * KNN_NODE_SIZE); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, KNN_NODES * KNN_NODE_SIZE); + csd_admin_load_program(admin_fd, 2, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_knn", 0, 0); + csd_admin_activation(admin_fd, 2, 1); + csd_exec_ranges(fd, 2, in_id, out_id, 0, 0, 0, 0); + csd_read(fd, out_id, 0, output, 4096); + for (int i = 0; i < KNN_NODES; i++) { + if (((int *)output)[i] < 0) { + fprintf(stderr, "knn smoke invalid distance at %d\n", i); + exit(EXIT_FAILURE); + } + } + csd_admin_activation(admin_fd, 2, 0); + csd_admin_unload_program(admin_fd, 2); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("knn shared-library smoke passed\n"); + + memset(input, 'x', 65536); + memset(output, 0, 65536); + for (int r = 0; r < 8; r++) { + char *record = (char *)input + r * 32; + + memset(record, 'A' + r, 32); + record[30] = '0'; + record[31] = (r % 2) ? ('0' + 55) : ('0' + 70); + } + in_id = csd_alloc(fd, 4096); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 4096); + csd_write(fd, out_id, 0, output, 4096); + csd_admin_load_program(admin_fd, 3, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_sql", 0, 0); + csd_admin_activation(admin_fd, 3, 1); + if (csd_exec_ranges(fd, 3, in_id, out_id, 0, 0, 50, 60) != 4 * 32) { + fprintf(stderr, "sql smoke unexpected result\n"); + exit(EXIT_FAILURE); + } + csd_admin_activation(admin_fd, 3, 0); + csd_admin_unload_program(admin_fd, 3); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("sql shared-library smoke passed\n"); + + memset(input, 'Z', 65536); + memcpy(input + 32, "needle", 6); + memcpy(input + 96, "needle", 6); + memcpy(pattern, "needle", 7); + in_id = csd_alloc(fd, 4096); + pattern_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 4096); + csd_write(fd, pattern_id, 0, pattern, 4096); + csd_admin_load_program(admin_fd, 4, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_grep", 0, 0); + csd_admin_activation(admin_fd, 4, 1); + if (csd_exec_ranges(fd, 4, in_id, pattern_id, 0, 0, 4, 1024) != 16) { + fprintf(stderr, "grep smoke unexpected result\n"); + exit(EXIT_FAILURE); + } + csd_admin_activation(admin_fd, 4, 0); + csd_admin_unload_program(admin_fd, 4); + csd_dealloc(fd, in_id); + csd_dealloc(fd, pattern_id); + printf("grep shared-library smoke passed\n"); + + close(admin_fd); + free(input); + free(output); + free(pattern); +} + +static void run_bench(int fd, uint32_t size, uint32_t iterations) +{ + uint8_t *buf = NULL; + uint8_t *read_buf = NULL; + uint32_t id; + uint64_t start; + uint64_t end; + + if (size == 0 || iterations == 0) { + fprintf(stderr, "bench requires non-zero bytes and iterations\n"); + exit(EXIT_FAILURE); + } + if (posix_memalign((void **)&buf, 4096, (size + 4095U) & ~4095U) || + posix_memalign((void **)&read_buf, 4096, (size + 4095U) & ~4095U)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + memset(buf, 0x5a, (size + 4095U) & ~4095U); + memset(read_buf, 0, (size + 4095U) & ~4095U); + + id = csd_alloc(fd, size); + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + csd_write(fd, id, 0, buf, size); + } + end = monotonic_ns(); + printf("bench afdm_write bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + size, iterations, (end - start) / iterations); + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + csd_read(fd, id, 0, read_buf, size); + } + end = monotonic_ns(); + printf("bench afdm_read bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + size, iterations, (end - start) / iterations); + + if (pwrite(fd, buf, size, 0) != size) { + perror("pwrite nvm"); + exit(EXIT_FAILURE); + } + fsync(fd); + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + csd_nvm_to_afdm(fd, id, 0, 0, (uint16_t)((size + 4095U) / 4096U - 1)); + } + end = monotonic_ns(); + printf("bench nvm_to_afdm bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + size, iterations, (end - start) / iterations); + + csd_dealloc(fd, id); + free(buf); + free(read_buf); +} + int main(int argc, char **argv) { const char *dev; @@ -494,6 +732,19 @@ int main(int argc, char **argv) return EXIT_FAILURE; } run_so_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "smoke-so-all")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_original_so_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "bench")) { + if (argc != 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_bench(fd, (uint32_t)parse_u64(argv[3], "bytes"), + (uint32_t)parse_u64(argv[4], "iterations")); } else if (!strcmp(op, "alloc")) { uint64_t size; uint32_t id; @@ -570,9 +821,10 @@ int main(int argc, char **argv) } else if (!strcmp(op, "exec")) { uint32_t runtime = 0; uint32_t group_id = 0; - uint32_t cparam1 = 0; + uint64_t cparam1 = 0; + uint64_t cparam2 = 0; - if (argc < 6 || argc > 9) { + if (argc < 6 || argc > 10) { usage(argv[0]); return EXIT_FAILURE; } @@ -582,13 +834,16 @@ int main(int argc, char **argv) if (argc >= 8) { group_id = (uint32_t)parse_u64(argv[7], "group-id"); } - if (argc == 9) { - cparam1 = (uint32_t)parse_u64(argv[8], "cparam1"); + if (argc >= 9) { + cparam1 = parse_u64(argv[8], "cparam1"); + } + if (argc == 10) { + cparam2 = parse_u64(argv[9], "cparam2"); } - csd_exec(fd, (uint32_t)parse_u64(argv[3], "csf-id"), + csd_exec(fd, (uint32_t)parse_u64(argv[3], "pind"), (uint32_t)parse_u64(argv[4], "in-afdm-id"), (uint32_t)parse_u64(argv[5], "out-afdm-id"), - runtime, group_id, cparam1); + runtime, group_id, cparam1, cparam2); } else if (!strcmp(op, "create-group")) { uint32_t id; diff --git a/tests/femu-csd/csd-vadd.c b/tests/femu-csd/csd-vadd.c index 0bcc2a63287..322e4805449 100644 --- a/tests/femu-csd/csd-vadd.c +++ b/tests/femu-csd/csd-vadd.c @@ -7,6 +7,13 @@ int64_t csd_vadd(struct femu_csd_args *args) int *in = args->mr_addr[1]; long long count = args->cparam1; + if (count == 0 && args->numr >= 2) { + long long out_count = args->mr_len[0] / (long long)sizeof(*out); + long long in_count = args->mr_len[1] / (2 * (long long)sizeof(*in)); + + count = out_count < in_count ? out_count : in_count; + } + if (args->numr < 2 || count < 0) { return -1; } From afdefcc977e63d24199f000c2a2fcf9e72a16044 Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 14:55:57 +0800 Subject: [PATCH 09/10] tests/femu-csd: add optional ubpf and lz4 coverage --- README.md | 5 +- femu-scripts/femu-compile.sh | 6 +- femu-scripts/run-csd.sh | 68 +++++++++++++------- hw/femu/csd/csd.c | 6 +- meson.build | 17 ++++- meson_options.txt | 2 + tests/femu-csd/Makefile | 12 +++- tests/femu-csd/README.md | 26 ++++++-- tests/femu-csd/csd-original-kernels.cc | 21 ++++++ tests/femu-csd/csd-passthru.c | 89 ++++++++++++++++++++++++++ tests/femu-csd/csd-vadd.bpf.c | 25 ++++++++ 11 files changed, 235 insertions(+), 42 deletions(-) create mode 100644 tests/femu-csd/csd-vadd.bpf.c diff --git a/README.md b/README.md index 2ec27b2434a..09e0a7a8323 100644 --- a/README.md +++ b/README.md @@ -385,10 +385,6 @@ nr_thread=4 # Number of functional simulation threads time_slice=200000 # Scheduler time slice (ns) context_switch_time=200 # Context switch time (ns) csf_runtime_scale=3 # Runtime scaling factor -pg_rd_lat=40000 # NAND page read latency (ns), used by NVM-to-AFDM -pg_wr_lat=200000 # NAND page program latency (ns) -blk_er_lat=2000000 # NAND block erase latency (ns) -ch_xfer_lat=0 # Channel transfer latency (ns) ``` **Current Scope:** @@ -399,6 +395,7 @@ ch_xfer_lat=0 # Channel transfer latency (ns) fields (`pind`, `numr`, `dlen`, `cparam1`, `cparam2`, `group`, `runtime`) - CEMU-style admin commands for CSF load/unload and activate/deactivate - Optional uBPF CSF support via `./femu-compile.sh --enable-csd-ubpf` + or `./femu-compile.sh --enable-csd-ubpf=/path/to/ubpf-cemu` - Group/QoS command metadata - Guest-side passthrough tests in `tests/femu-csd/` diff --git a/femu-scripts/femu-compile.sh b/femu-scripts/femu-compile.sh index 73698fb9907..24988632a6a 100755 --- a/femu-scripts/femu-compile.sh +++ b/femu-scripts/femu-compile.sh @@ -8,9 +8,13 @@ for arg in "$@"; do --enable-csd-ubpf) FEMU_CONFIGURE_OPTS="${FEMU_CONFIGURE_OPTS} --enable-femu-csd-ubpf" ;; + --enable-csd-ubpf=*) + UBPF_PATH="${arg#*=}" + FEMU_CONFIGURE_OPTS="${FEMU_CONFIGURE_OPTS} --enable-femu-csd-ubpf -Dfemu_csd_ubpf_path=${UBPF_PATH}" + ;; *) echo "Unknown option: $arg" - echo "Usage: $0 [--enable-csd-ubpf]" + echo "Usage: $0 [--enable-csd-ubpf[=/path/to/ubpf-cemu]]" exit 1 ;; esac diff --git a/femu-scripts/run-csd.sh b/femu-scripts/run-csd.sh index 42e69d8d3b3..496c7121e4c 100755 --- a/femu-scripts/run-csd.sh +++ b/femu-scripts/run-csd.sh @@ -6,36 +6,60 @@ IMGDIR=$HOME/images # Virtual machine disk image OSIMGF=$IMGDIR/u20s.qcow2 +# Configurable SSD Controller layout parameters (must be power of 2) +secsz=512 # sector size in bytes +secs_per_pg=8 # number of sectors in a flash page +pgs_per_blk=256 # number of pages per flash block +blks_per_pl=256 # number of blocks per plane +pls_per_lun=1 # keep it at one, no multiplanes support +luns_per_ch=8 # number of chips per channel +nchs=8 # number of channels +ssd_size=4096 # in megabytes + +# Latency in nanoseconds +pg_rd_lat=40000 # page read latency +pg_wr_lat=200000 # page write latency +blk_er_lat=2000000 # block erase latency +ch_xfer_lat=0 # channel transfer time, ignored for now + +# GC Threshold (1-100) +gc_thres_pcent=75 +gc_thres_pcent_high=95 + # FEMU CSD parameters -SSD_SIZE_MB=4096 -FDM_SIZE_MB=64 -NR_CU=4 -NR_THREAD=4 -TIME_SLICE=200000 -CONTEXT_SWITCH_TIME=200 -CSF_RUNTIME_SCALE=3 -PG_RD_LAT=40000 -PG_WR_LAT=200000 -BLK_ER_LAT=2000000 -CH_XFER_LAT=0 +fdm_size=64 +nr_cu=4 +nr_thread=4 +time_slice=200000 +context_switch_time=200 +csf_runtime_scale=3 #----------------------------------------------------------------------- # Compose the entire FEMU CSD command line options FEMU_OPTIONS="-device femu" -FEMU_OPTIONS=${FEMU_OPTIONS}",devsz_mb=${SSD_SIZE_MB}" +FEMU_OPTIONS=${FEMU_OPTIONS}",devsz_mb=${ssd_size}" FEMU_OPTIONS=${FEMU_OPTIONS}",namespaces=1" FEMU_OPTIONS=${FEMU_OPTIONS}",femu_mode=4" -FEMU_OPTIONS=${FEMU_OPTIONS}",fdm_size=${FDM_SIZE_MB}" -FEMU_OPTIONS=${FEMU_OPTIONS}",nr_cu=${NR_CU}" -FEMU_OPTIONS=${FEMU_OPTIONS}",nr_thread=${NR_THREAD}" -FEMU_OPTIONS=${FEMU_OPTIONS}",time_slice=${TIME_SLICE}" -FEMU_OPTIONS=${FEMU_OPTIONS}",context_switch_time=${CONTEXT_SWITCH_TIME}" -FEMU_OPTIONS=${FEMU_OPTIONS}",csf_runtime_scale=${CSF_RUNTIME_SCALE}" -FEMU_OPTIONS=${FEMU_OPTIONS}",pg_rd_lat=${PG_RD_LAT}" -FEMU_OPTIONS=${FEMU_OPTIONS}",pg_wr_lat=${PG_WR_LAT}" -FEMU_OPTIONS=${FEMU_OPTIONS}",blk_er_lat=${BLK_ER_LAT}" -FEMU_OPTIONS=${FEMU_OPTIONS}",ch_xfer_lat=${CH_XFER_LAT}" +FEMU_OPTIONS=${FEMU_OPTIONS}",secsz=${secsz}" +FEMU_OPTIONS=${FEMU_OPTIONS}",secs_per_pg=${secs_per_pg}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pgs_per_blk=${pgs_per_blk}" +FEMU_OPTIONS=${FEMU_OPTIONS}",blks_per_pl=${blks_per_pl}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pls_per_lun=${pls_per_lun}" +FEMU_OPTIONS=${FEMU_OPTIONS}",luns_per_ch=${luns_per_ch}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nchs=${nchs}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pg_rd_lat=${pg_rd_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",pg_wr_lat=${pg_wr_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",blk_er_lat=${blk_er_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",ch_xfer_lat=${ch_xfer_lat}" +FEMU_OPTIONS=${FEMU_OPTIONS}",gc_thres_pcent=${gc_thres_pcent}" +FEMU_OPTIONS=${FEMU_OPTIONS}",gc_thres_pcent_high=${gc_thres_pcent_high}" +FEMU_OPTIONS=${FEMU_OPTIONS}",fdm_size=${fdm_size}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nr_cu=${nr_cu}" +FEMU_OPTIONS=${FEMU_OPTIONS}",nr_thread=${nr_thread}" +FEMU_OPTIONS=${FEMU_OPTIONS}",time_slice=${time_slice}" +FEMU_OPTIONS=${FEMU_OPTIONS}",context_switch_time=${context_switch_time}" +FEMU_OPTIONS=${FEMU_OPTIONS}",csf_runtime_scale=${csf_runtime_scale}" echo ${FEMU_OPTIONS} diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index dc129137efc..410279f5b1c 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -373,7 +373,7 @@ static uint16_t csd_load_ubpf(FemuCsdProgram *program, bool jit) return NVME_INVALID_FIELD | NVME_DNR; } - if (ubpf_load_elf(program->ubpf_vm, elf, elf_size, symbol, &errmsg) < 0) { + if (ubpf_load_elf_ex(program->ubpf_vm, elf, elf_size, symbol, &errmsg) < 0) { femu_err("CSD: failed to load uBPF ELF %s:%s: %s\n", path, symbol, errmsg ? errmsg : "unknown error"); free(errmsg); @@ -682,11 +682,11 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; } if (program->ubpf_jit_fn) { - result = program->ubpf_jit_fn(&args, sizeof(args)); + result = program->ubpf_jit_fn((struct ubpf_jit_args *)&args); } else { uint64_t ubpf_result; - if (ubpf_exec(program->ubpf_vm, &args, sizeof(args), + if (ubpf_exec(program->ubpf_vm, (struct ubpf_jit_args *)&args, &ubpf_result) < 0) { status = NVME_INVALID_FIELD | NVME_DNR; break; diff --git a/meson.build b/meson.build index 50d4efe6df9..356e142e502 100644 --- a/meson.build +++ b/meson.build @@ -2292,8 +2292,21 @@ endif # libbpf bpf_version = '1.1.0' libbpf = dependency('libbpf', version: '>=' + bpf_version, required: get_option('bpf'), method: 'pkg-config') -femu_csd_ubpf = dependency('ubpf', required: get_option('femu_csd_ubpf'), - method: 'pkg-config') +femu_csd_ubpf_path = get_option('femu_csd_ubpf_path') +if femu_csd_ubpf_path != '' + femu_csd_ubpf_lib_path = femu_csd_ubpf_path / 'build/lib/libubpf.a' + femu_csd_ubpf_lib = cc.find_library('ubpf', + dirs: femu_csd_ubpf_path / 'build/lib', + required: get_option('femu_csd_ubpf')) + femu_csd_ubpf = declare_dependency( + include_directories: include_directories(femu_csd_ubpf_path / 'vm/inc', + femu_csd_ubpf_path / 'build/vm'), + dependencies: femu_csd_ubpf_lib) + emulator_link_args += femu_csd_ubpf_lib_path +else + femu_csd_ubpf = dependency('ubpf', required: get_option('femu_csd_ubpf'), + method: 'pkg-config') +endif if libbpf.found() and not cc.links(''' #include #include diff --git a/meson_options.txt b/meson_options.txt index 9d66b8e9d8c..f728738e86a 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -148,6 +148,8 @@ option('bpf', type : 'feature', value : 'auto', description: 'eBPF support') option('femu_csd_ubpf', type : 'feature', value : 'disabled', description: 'uBPF runtime support for FEMU CSD') +option('femu_csd_ubpf_path', type : 'string', value : '', + description: 'Path to an external ubpf-cemu build for FEMU CSD') option('cocoa', type : 'feature', value : 'auto', description: 'Cocoa user interface (macOS only)') option('curl', type : 'feature', value : 'auto', diff --git a/tests/femu-csd/Makefile b/tests/femu-csd/Makefile index 8e990b16cec..105d4c6a353 100644 --- a/tests/femu-csd/Makefile +++ b/tests/femu-csd/Makefile @@ -4,12 +4,15 @@ CFLAGS ?= -Wall -Wextra -O2 -g SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC CXX_SO_CFLAGS ?= -Wall -Wextra -O2 -g -fPIC +BPF_TARGETS := csd-vadd.bpf.o TARGETS := csd-passthru csd-vadd.so csd-original-kernels.so -.PHONY: all clean +.PHONY: all bpf clean all: $(TARGETS) +bpf: $(BPF_TARGETS) + csd-passthru: csd-passthru.c $(CC) $(CFLAGS) -o $@ $< @@ -17,7 +20,10 @@ csd-vadd.so: csd-vadd.c femu-csd-kernel.h $(CC) $(SO_CFLAGS) -shared -o $@ $< csd-original-kernels.so: csd-original-kernels.cc femu-csd-kernel.h - $(CXX) $(CXX_SO_CFLAGS) -shared -o $@ $< -pthread + $(CXX) $(CXX_SO_CFLAGS) -shared -o $@ $< -pthread -llz4 + +csd-vadd.bpf.o: csd-vadd.bpf.c femu-csd-kernel.h + clang -target bpf -O2 -g -c -o $@ $< clean: - rm -f $(TARGETS) + rm -f $(TARGETS) $(BPF_TARGETS) diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index 8384a0e7c1d..6c70a4d7502 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -37,9 +37,9 @@ sudo ./csd-passthru /dev/nvme0n1 smoke-so /home//FEMU/tests/femu-csd/csd-v ``` `make` also builds `csd-original-kernels.so`, which contains small -shared-library ports of the original CEMU `knn`, `sql`, and `grep` kernels. -These tests exercise the same CSD program lifecycle and inline memory range -interface as the vadd test: +shared-library ports of the original CEMU `knn`, `sql`, `grep`, and `lz4` +kernels. These tests exercise the same CSD program lifecycle and inline memory +range interface as the vadd test: ```bash sudo ./csd-passthru /dev/nvme0n1 smoke-so-all /home//FEMU/tests/femu-csd/csd-original-kernels.so @@ -102,12 +102,24 @@ requests; the passthrough commands validate the additional computational storage interface. Shared-library CSF support is enabled in the default FEMU build. uBPF support -is optional because it depends on an external `ubpf` library. Build FEMU with: +is optional because it depends on an external `ubpf` library. If `ubpf` is +installed through pkg-config, build FEMU with: ```bash ./femu-compile.sh --enable-csd-ubpf ``` -The original CEMU LZ4 and uBPF tests are not part of the default smoke suite: -LZ4 requires an additional host dependency, and uBPF requires FEMU to be built -with `--enable-csd-ubpf`. +If you use the `ubpf-cemu` source tree directly, pass its path explicitly: + +```bash +./femu-compile.sh --enable-csd-ubpf=/home//CEMU-FEMU/ubpf-cemu +``` + +The guest helper does not build BPF objects by default. Build the BPF test +program on the host or in a guest with Clang BPF support: + +```bash +make bpf +sudo ./csd-passthru /dev/nvme0n1 smoke-ubpf /host/path/csd-vadd.bpf.o 0 +sudo ./csd-passthru /dev/nvme0n1 smoke-ubpf /host/path/csd-vadd.bpf.o 1 +``` diff --git a/tests/femu-csd/csd-original-kernels.cc b/tests/femu-csd/csd-original-kernels.cc index 63eb53b3111..fca043565e4 100644 --- a/tests/femu-csd/csd-original-kernels.cc +++ b/tests/femu-csd/csd-original-kernels.cc @@ -3,6 +3,8 @@ #include #include +#include + #include "femu-csd-kernel.h" struct KnnNode { @@ -125,3 +127,22 @@ extern "C" long long csd_grep(struct femu_csd_args *args) return grep_rows(data, rows, cols, pattern); } + +extern "C" long long csd_lz4(struct femu_csd_args *args) +{ + if (args->numr < 2) { + return -1; + } + + const char *input = static_cast(args->mr_addr[0]); + char *output = static_cast(args->mr_addr[1]); + long long input_size = args->mr_len[0]; + long long output_size = args->mr_len[1]; + int max_compressed_size = LZ4_compressBound(input_size); + + if (max_compressed_size <= 0 || output_size < max_compressed_size) { + return -1; + } + + return LZ4_compress_default(input, output, input_size, output_size); +} diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index 9140604d262..cd2e5d3b30a 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -75,6 +75,7 @@ static void usage(const char *prog) " %s /dev/nvmeXnY exec [runtime-ns] [group-id] [cparam1] [cparam2]\n" " %s /dev/nvmeXnY smoke-so \n" " %s /dev/nvmeXnY smoke-so-all \n" + " %s /dev/nvmeXnY smoke-ubpf [jit:0|1]\n" " %s /dev/nvmeXnY bench \n" " %s /dev/nvmeX admin-load-so [runtime-ns]\n" " %s /dev/nvmeX admin-load-ubpf [jit:0|1] [runtime-ns]\n" @@ -89,6 +90,7 @@ static void usage(const char *prog) " %s /dev/nvmeXnY read \n" " %s /dev/nvmeXnY nvm-to-afdm \n", prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, + prog, prog, prog, prog, prog, prog, prog, prog, prog); } @@ -548,6 +550,63 @@ static void run_so_smoke(const char *dev, int fd, const char *so_path) free(output); } +static void run_ubpf_smoke(const char *dev, int fd, const char *elf_path, + uint8_t jit) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t csf_id = 5; + int admin_fd; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i * 3; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + admin_fd = open_admin_from_namespace(dev); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_EBPF, + elf_path, "csd_vadd_bpf", jit, 0); + csd_admin_activation(admin_fd, csf_id, 1); + printf("loaded uBPF CSF id=%" PRIu16 " jit=%u\n", csf_id, jit); + csd_exec(fd, csf_id, in_id, out_id, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = i + i * 3; + + if (output[i] != expected) { + fprintf(stderr, "uBPF smoke mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + close(admin_fd); + printf("uBPF smoke passed\n"); + + free(input); + free(output); +} + static void run_original_so_smoke(const char *dev, int fd, const char *so_path) { int admin_fd = open_admin_from_namespace(dev); @@ -643,6 +702,25 @@ static void run_original_so_smoke(const char *dev, int fd, const char *so_path) csd_dealloc(fd, pattern_id); printf("grep shared-library smoke passed\n"); + memset(input, 'L', 4096); + memset(output, 0, 65536); + in_id = csd_alloc(fd, 4096); + out_id = csd_alloc(fd, 8192); + csd_write(fd, in_id, 0, input, 4096); + csd_write(fd, out_id, 0, output, 8192); + csd_admin_load_program(admin_fd, 6, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_lz4", 0, 0); + csd_admin_activation(admin_fd, 6, 1); + if (csd_exec_ranges(fd, 6, in_id, out_id, 0, 0, 0, 0) == 0) { + fprintf(stderr, "lz4 smoke unexpected result\n"); + exit(EXIT_FAILURE); + } + csd_admin_activation(admin_fd, 6, 0); + csd_admin_unload_program(admin_fd, 6); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("lz4 shared-library smoke passed\n"); + close(admin_fd); free(input); free(output); @@ -738,6 +816,17 @@ int main(int argc, char **argv) return EXIT_FAILURE; } run_original_so_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "smoke-ubpf")) { + uint8_t jit = 0; + + if (argc < 4 || argc > 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + if (argc == 5) { + jit = (uint8_t)parse_u64(argv[4], "jit"); + } + run_ubpf_smoke(dev, fd, argv[3], jit ? 1 : 0); } else if (!strcmp(op, "bench")) { if (argc != 5) { usage(argv[0]); diff --git a/tests/femu-csd/csd-vadd.bpf.c b/tests/femu-csd/csd-vadd.bpf.c new file mode 100644 index 00000000000..6e29bdcfad2 --- /dev/null +++ b/tests/femu-csd/csd-vadd.bpf.c @@ -0,0 +1,25 @@ +#include "femu-csd-kernel.h" + +long long csd_vadd_bpf(struct femu_csd_args *args) +{ + int *in; + int *out; + long long count; + + if (args->numr < 2) { + return -1; + } + + in = args->mr_addr[1]; + out = args->mr_addr[0]; + count = args->cparam1; + if (count <= 0) { + return -1; + } + + for (long long i = 0; i < count; i++) { + out[i] = in[i * 2] + in[i * 2 + 1]; + } + + return count; +} From 838e53ed0fc64099b7b5917c9cccba66003678bd Mon Sep 17 00:00:00 2001 From: Emilio597 <857005703@qq.com> Date: Tue, 26 May 2026 16:21:39 +0800 Subject: [PATCH 10/10] hw/femu: add CSD MRS and passthrough examples --- hw/femu/csd/csd.c | 148 +++++++++- hw/femu/csd/csd.h | 18 ++ hw/femu/nvme-admin.c | 4 +- hw/femu/nvme.h | 1 + tests/femu-csd/README.md | 31 +++ tests/femu-csd/csd-passthru.c | 511 +++++++++++++++++++++++++++++++++- tests/femu-csd/csd-vadd.c | 36 +++ 7 files changed, 734 insertions(+), 15 deletions(-) diff --git a/hw/femu/csd/csd.c b/hw/femu/csd/csd.c index 410279f5b1c..e2674b28218 100644 --- a/hw/femu/csd/csd.c +++ b/hw/femu/csd/csd.c @@ -47,15 +47,23 @@ typedef struct FemuCsdGroup { uint32_t deadline; } FemuCsdGroup; +typedef struct FemuCsdMrs { + uint16_t rsid; + uint32_t numr; + NvmeCsdMemoryRange *ranges; +} FemuCsdMrs; + typedef struct FemuCsdState { CsdCtrlParams params; uint64_t fdm_capacity; uint64_t fdm_used; uint32_t next_afdm_id; uint32_t next_group_id; + uint32_t next_rsid; GHashTable *afdms; GHashTable *programs; GHashTable *groups; + GHashTable *mrs; QemuMutex lock; } FemuCsdState; @@ -73,6 +81,7 @@ static void csd_check_size(void) QEMU_BUILD_BUG_ON(sizeof(NvmeCsdDeleteGroupCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdLoadProgramCmd) != 64); QEMU_BUILD_BUG_ON(sizeof(NvmeCsdProgramActivationCmd) != 64); + QEMU_BUILD_BUG_ON(sizeof(NvmeCsdMrsMgmtCmd) != 64); } static FemuCsdState *csd_state(FemuCtrl *n) @@ -122,6 +131,18 @@ static void csd_program_free(gpointer opaque) g_free(program); } +static void csd_mrs_free(gpointer opaque) +{ + FemuCsdMrs *mrs = opaque; + + if (!mrs) { + return; + } + + g_free(mrs->ranges); + g_free(mrs); +} + static void csd_init_ctrl_str(FemuCtrl *n) { static int csd_id; @@ -175,12 +196,15 @@ static void csd_init(FemuCtrl *n, Error **errp) csd->fdm_capacity = n->csd_params.fdm_size_mb * MiB; csd->next_afdm_id = 1; csd->next_group_id = 1; + csd->next_rsid = 1; csd->afdms = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, csd_afdm_free); csd->programs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, csd_program_free); csd->groups = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, g_free); + csd->mrs = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, + csd_mrs_free); qemu_mutex_init(&csd->lock); n->ext_ops.state = csd; @@ -201,6 +225,7 @@ static void csd_exit(FemuCtrl *n) g_hash_table_destroy(csd->afdms); g_hash_table_destroy(csd->programs); g_hash_table_destroy(csd->groups); + g_hash_table_destroy(csd->mrs); qemu_mutex_destroy(&csd->lock); g_free(csd); n->ext_ops.state = NULL; @@ -233,6 +258,15 @@ static FemuCsdGroup *csd_get_group_locked(FemuCsdState *csd, uint32_t id) return g_hash_table_lookup(csd->groups, GUINT_TO_POINTER(id)); } +static FemuCsdMrs *csd_get_mrs_locked(FemuCsdState *csd, uint32_t id) +{ + if (id == 0) { + return NULL; + } + + return g_hash_table_lookup(csd->mrs, GUINT_TO_POINTER(id)); +} + static uint16_t csd_check_afdm_range(FemuCsdAfdm *afdm, uint64_t offset, uint64_t size) { @@ -530,6 +564,79 @@ static uint16_t csd_compute_activate(FemuCtrl *n, NvmeCmd *cmd) return NVME_SUCCESS; } +static uint16_t csd_mrs_mgmt(FemuCtrl *n, NvmeCmd *cmd, NvmeCqe *cqe) +{ + FemuCsdState *csd = csd_state(n); + NvmeCsdMrsMgmtCmd *manage = (NvmeCsdMrsMgmtCmd *)cmd; + uint16_t rsid = le16_to_cpu(manage->rsid); + uint32_t sel = manage->sel; + uint32_t numr = manage->numr; + uint64_t prp1 = le64_to_cpu(manage->prp1); + uint64_t prp2 = le64_to_cpu(manage->prp2); + NvmeCsdMemoryRange *ranges = NULL; + FemuCsdMrs *mrs; + uint32_t id; + uint16_t status = NVME_SUCCESS; + + switch (sel) { + case 0: + if (rsid != 0 || numr == 0 || + numr > CSD_EXEC_DATA_MAX / sizeof(NvmeCsdMemoryRange)) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + ranges = g_new0(NvmeCsdMemoryRange, numr); + status = dma_write_prp(n, (uint8_t *)ranges, + numr * sizeof(*ranges), prp1, prp2); + if (status) { + g_free(ranges); + return status; + } + + qemu_mutex_lock(&csd->lock); + id = csd->next_rsid++; + if (id == 0) { + csd->next_rsid = 1; + id = csd->next_rsid++; + } + while (g_hash_table_contains(csd->mrs, GUINT_TO_POINTER(id))) { + id = csd->next_rsid++; + if (id == 0) { + csd->next_rsid = 1; + id = csd->next_rsid++; + } + } + + mrs = g_new0(FemuCsdMrs, 1); + mrs->rsid = id; + mrs->numr = numr; + mrs->ranges = ranges; + g_hash_table_insert(csd->mrs, GUINT_TO_POINTER(id), mrs); + qemu_mutex_unlock(&csd->lock); + + cqe->n.result = id; + return NVME_SUCCESS; + + case 1: + if (rsid == 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } + + qemu_mutex_lock(&csd->lock); + if (!csd_get_mrs_locked(csd, rsid)) { + qemu_mutex_unlock(&csd->lock); + return NVME_INVALID_FIELD | NVME_DNR; + } + g_hash_table_remove(csd->mrs, GUINT_TO_POINTER((uint32_t)rsid)); + qemu_mutex_unlock(&csd->lock); + cqe->n.result = 0; + return NVME_SUCCESS; + + default: + return NVME_INVALID_FIELD | NVME_DNR; + } +} + static uint16_t csd_build_exec_args_locked(FemuCsdState *csd, NvmeCsdMemoryRange *ranges, uint32_t numr, @@ -595,6 +702,7 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint64_t prp1 = le64_to_cpu(exec->prp1); uint64_t prp2 = le64_to_cpu(exec->prp2); FemuCsdProgram *program; + FemuCsdMrs *mrs = NULL; uint64_t copy_size; uint8_t *data = NULL; NvmeCsdMemoryRange *ranges = NULL; @@ -608,21 +716,26 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) dlen = numr * sizeof(NvmeCsdMemoryRange); } - if (pind == 0 || rsid != 0 || numr == 0 || + if (pind == 0 || (rsid == 0 && numr == 0) || numr > CSD_EXEC_DATA_MAX / sizeof(NvmeCsdMemoryRange)) { return NVME_INVALID_FIELD | NVME_DNR; } + if (rsid != 0 && numr != 0) { + return NVME_INVALID_FIELD | NVME_DNR; + } if (dlen < numr * sizeof(NvmeCsdMemoryRange) || dlen > CSD_EXEC_DATA_MAX) { return NVME_INVALID_FIELD | NVME_DNR; } - data = g_malloc0(dlen); - status = dma_write_prp(n, data, dlen, prp1, prp2); - if (status) { - g_free(data); - return status; + if (numr) { + data = g_malloc0(dlen); + status = dma_write_prp(n, data, dlen, prp1, prp2); + if (status) { + g_free(data); + return status; + } + ranges = (NvmeCsdMemoryRange *)data; } - ranges = (NvmeCsdMemoryRange *)data; qemu_mutex_lock(&csd->lock); program = csd_get_program_locked(csd, pind); @@ -647,6 +760,18 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) runtime = program->runtime; } + if (rsid) { + mrs = csd_get_mrs_locked(csd, rsid); + if (!mrs) { + qemu_mutex_unlock(&csd->lock); + status = NVME_INVALID_FIELD | NVME_DNR; + goto out; + } + ranges = mrs->ranges; + numr = mrs->numr; + dlen = 0; + } + status = csd_build_exec_args_locked(csd, ranges, numr, &args, &mr_addr, &mr_len); if (status) { @@ -655,7 +780,7 @@ static uint16_t csd_exec(FemuCtrl *n, NvmeCmd *cmd, NvmeRequest *req) } args.cparam1 = cparam1; args.cparam2 = cparam2; - args.data_buffer = dlen > numr * sizeof(NvmeCsdMemoryRange) ? + args.data_buffer = data && dlen > numr * sizeof(NvmeCsdMemoryRange) ? data + numr * sizeof(NvmeCsdMemoryRange) : NULL; args.buffer_len = args.data_buffer ? dlen - numr * sizeof(NvmeCsdMemoryRange) : 0; @@ -1010,9 +1135,11 @@ static uint16_t csd_io_cmd(FemuCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, } } -static uint16_t csd_admin_cmd(FemuCtrl *n, NvmeCmd *cmd) +static uint16_t csd_admin_cmd(FemuCtrl *n, NvmeCmd *cmd, NvmeCqe *cqe) { switch (cmd->opcode) { + case NVME_ADM_CMD_CSD_MRS_MGMT: + return csd_mrs_mgmt(n, cmd, cqe); case NVME_ADM_CMD_CSD_COMPUTE_LOAD: case NVME_ADM_CMD_CSD_COMPUTE_LOAD_DATA: return csd_compute_load(n, cmd); @@ -1031,7 +1158,8 @@ int nvme_register_csd(FemuCtrl *n) .exit = csd_exit, .rw_check_req = NULL, .start_ctrl = NULL, - .admin_cmd = csd_admin_cmd, + .admin_cmd = NULL, + .admin_cmd_cqe = csd_admin_cmd, .io_cmd = csd_io_cmd, .get_log = NULL, }; diff --git a/hw/femu/csd/csd.h b/hw/femu/csd/csd.h index 5272f811ee7..91a17c21ecf 100644 --- a/hw/femu/csd/csd.h +++ b/hw/femu/csd/csd.h @@ -16,6 +16,7 @@ enum FemuCsdIoCommands { }; enum FemuCsdAdminCommands { + NVME_ADM_CMD_CSD_MRS_MGMT = 0x21, NVME_ADM_CMD_CSD_COMPUTE_LOAD = 0x22, NVME_ADM_CMD_CSD_COMPUTE_ACTIVATE = 0x23, NVME_ADM_CMD_CSD_COMPUTE_LOAD_DATA = 0x25, @@ -223,4 +224,21 @@ typedef struct QEMU_PACKED NvmeCsdDeleteGroupCmd { uint32_t rsvd11[5]; } NvmeCsdDeleteGroupCmd; +typedef struct QEMU_PACKED NvmeCsdMrsMgmtCmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd[4]; + uint64_t prp1; + uint64_t prp2; + uint16_t sel:4; + uint16_t rsvd10:12; + uint16_t rsid; + uint8_t numr; + uint8_t rsvd11a; + uint16_t rsvd11b; + uint32_t rsvd12[4]; +} NvmeCsdMrsMgmtCmd; + #endif diff --git a/hw/femu/nvme-admin.c b/hw/femu/nvme-admin.c index 769c7fcf9d4..5ca5ba0e4a0 100644 --- a/hw/femu/nvme-admin.c +++ b/hw/femu/nvme-admin.c @@ -1407,6 +1407,9 @@ static uint16_t nvme_admin_cmd(FemuCtrl *n, NvmeCmd *cmd, NvmeCqe *cqe) case NVME_ADM_CMD_SECURITY_RECV: return NVME_INVALID_OPCODE | NVME_DNR; default: + if (n->ext_ops.admin_cmd_cqe) { + return n->ext_ops.admin_cmd_cqe(n, cmd, cqe); + } if (n->ext_ops.admin_cmd) { return n->ext_ops.admin_cmd(n, cmd); } @@ -1454,4 +1457,3 @@ void nvme_process_sq_admin(void *opaque) nvme_isr_notify_admin(cq); } } - diff --git a/hw/femu/nvme.h b/hw/femu/nvme.h index c8ff6fc4466..87603a54a12 100644 --- a/hw/femu/nvme.h +++ b/hw/femu/nvme.h @@ -1559,6 +1559,7 @@ typedef struct FemuExtCtrlOps { uint16_t (*rw_check_req)(struct FemuCtrl *, NvmeCmd *, NvmeRequest *); int (*start_ctrl)(struct FemuCtrl *); uint16_t (*admin_cmd)(struct FemuCtrl *, NvmeCmd *); + uint16_t (*admin_cmd_cqe)(struct FemuCtrl *, NvmeCmd *, NvmeCqe *); uint16_t (*io_cmd)(struct FemuCtrl *, NvmeNamespace *, NvmeCmd *, NvmeRequest *); uint16_t (*get_log)(struct FemuCtrl *, NvmeCmd *); } FemuExtCtrlOps; diff --git a/tests/femu-csd/README.md b/tests/femu-csd/README.md index 6c70a4d7502..c075d7832e7 100644 --- a/tests/femu-csd/README.md +++ b/tests/femu-csd/README.md @@ -45,6 +45,35 @@ range interface as the vadd test: sudo ./csd-passthru /dev/nvme0n1 smoke-so-all /home//FEMU/tests/femu-csd/csd-original-kernels.so ``` +FDMFS-free MRS is available through the original CEMU memory range set +management command layout (`0x21`). The passthrough helper creates an MRS from +AFDM-backed memory range descriptors and executes a CSF by `rsid`: + +```bash +sudo ./csd-passthru /dev/nvme0n1 smoke-mrs /home//FEMU/tests/femu-csd/csd-vadd.so +sudo ./csd-passthru /dev/nvme0n1 vadd-example /home//FEMU/tests/femu-csd/csd-vadd.so +``` + +The migrated sync-breakdown check measures NVM-to-AFDM copy, CSF execution, and +AFDM read as separate stages: + +```bash +sudo ./csd-passthru /dev/nvme0n1 sync-breakdown /home//FEMU/tests/femu-csd/csd-vadd.so 4096 16 +``` + +The indirect vadd smoke keeps the original indirect CSF ABI shape and uses an +AFDM-backed MRS instead of FDMFS files: + +```bash +sudo ./csd-passthru /dev/nvme0n1 indirect-vadd /home//FEMU/tests/femu-csd/csd-vadd.so +``` + +A compact benchmark entry covers vadd plus the original kernel smoke set: + +```bash +sudo ./csd-passthru /dev/nvme0n1 benchmark-kernels /home//FEMU/tests/femu-csd/csd-vadd.so /home//FEMU/tests/femu-csd/csd-original-kernels.so 1 +``` + The shared-library CSF ABI is: ```c @@ -88,6 +117,8 @@ sudo ./csd-passthru /dev/nvme0 admin-load-ubpf 1 /host/path/csf.bpf.o csf_symbol sudo ./csd-passthru /dev/nvme0 admin-activate 1 sudo ./csd-passthru /dev/nvme0 admin-deactivate 1 sudo ./csd-passthru /dev/nvme0 admin-unload 1 +sudo ./csd-passthru /dev/nvme0 admin-create-mrs +sudo ./csd-passthru /dev/nvme0 admin-delete-mrs ``` The tool assumes FEMU was started with CSD mode enabled, for example: diff --git a/tests/femu-csd/csd-passthru.c b/tests/femu-csd/csd-passthru.c index cd2e5d3b30a..6653f0610cc 100644 --- a/tests/femu-csd/csd-passthru.c +++ b/tests/femu-csd/csd-passthru.c @@ -15,6 +15,7 @@ #include enum { + CSD_ADM_MRS_MGMT = 0x21, CSD_ADM_COMPUTE_LOAD = 0x22, CSD_ADM_COMPUTE_LOAD_DATA = 0x25, CSD_ADM_COMPUTE_ACTIVATE = 0x23, @@ -33,6 +34,8 @@ enum { CSD_CSF_TYPE_PHANTOM = 0, CSD_CSF_TYPE_EBPF = 1, CSD_CSF_TYPE_SHARED_LIB = 3, + CSD_LOAD_FLAG_JIT = 1U << 0, + CSD_LOAD_FLAG_INDIRECT = 1U << 1, }; enum { @@ -65,6 +68,23 @@ struct csd_program_execute_cmd { uint32_t runtime; } __attribute__((packed)); +struct csd_mrs_mgmt_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t rsvd[4]; + uint64_t prp1; + uint64_t prp2; + uint16_t sel:4; + uint16_t rsvd10:12; + uint16_t rsid; + uint8_t numr; + uint8_t rsvd11a; + uint16_t rsvd11b; + uint32_t rsvd12[4]; +} __attribute__((packed)); + static void usage(const char *prog) { fprintf(stderr, @@ -76,6 +96,11 @@ static void usage(const char *prog) " %s /dev/nvmeXnY smoke-so \n" " %s /dev/nvmeXnY smoke-so-all \n" " %s /dev/nvmeXnY smoke-ubpf [jit:0|1]\n" + " %s /dev/nvmeXnY smoke-mrs \n" + " %s /dev/nvmeXnY vadd-example \n" + " %s /dev/nvmeXnY sync-breakdown \n" + " %s /dev/nvmeXnY indirect-vadd \n" + " %s /dev/nvmeXnY benchmark-kernels \n" " %s /dev/nvmeXnY bench \n" " %s /dev/nvmeX admin-load-so [runtime-ns]\n" " %s /dev/nvmeX admin-load-ubpf [jit:0|1] [runtime-ns]\n" @@ -86,12 +111,14 @@ static void usage(const char *prog) " %s /dev/nvmeXnY create-group \n" " %s /dev/nvmeXnY set-qos \n" " %s /dev/nvmeXnY delete-group \n" + " %s /dev/nvmeX admin-create-mrs \n" + " %s /dev/nvmeX admin-delete-mrs \n" " %s /dev/nvmeXnY write \n" " %s /dev/nvmeXnY read \n" " %s /dev/nvmeXnY nvm-to-afdm \n", prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, prog, - prog, - prog, prog, prog, prog, prog, prog, prog, prog); + prog, prog, prog, prog, prog, prog, + prog, prog, prog, prog, prog, prog, prog, prog, prog, prog); } static uint64_t monotonic_ns(void) @@ -199,7 +226,7 @@ static void csd_admin_load_program(int fd, uint16_t pind, uint8_t type, .data_len = size, .cdw2 = ((uint32_t)flags & 0x1), .cdw3 = runtime, - .cdw10 = cdw10, + .cdw10 = cdw10 | ((flags & CSD_LOAD_FLAG_INDIRECT) ? (1U << 28) : 0), .cdw11 = (uint32_t)size, .cdw14 = (uint32_t)size, }; @@ -249,6 +276,41 @@ static void csd_admin_activation(int fd, uint16_t pind, uint8_t sel) } } +static uint16_t csd_admin_create_mrs(int fd, const struct csd_memory_range *ranges, + uint8_t numr) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_mrs_mgmt_cmd *mrs = (struct csd_mrs_mgmt_cmd *)&cmd; + + mrs->opcode = CSD_ADM_MRS_MGMT; + mrs->nsid = 1; + mrs->sel = 0; + mrs->numr = numr; + cmd.addr = (uintptr_t)ranges; + cmd.data_len = numr * sizeof(*ranges); + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return (uint16_t)cmd.result; +} + +static void csd_admin_delete_mrs(int fd, uint16_t rsid) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_mrs_mgmt_cmd *mrs = (struct csd_mrs_mgmt_cmd *)&cmd; + + mrs->opcode = CSD_ADM_MRS_MGMT; + mrs->nsid = 1; + mrs->sel = 1; + mrs->rsid = rsid; + + if (submit_admin(fd, &cmd)) { + exit(EXIT_FAILURE); + } +} + static uint32_t csd_exec_ranges(int fd, uint32_t pind, uint32_t mr0_afdm_id, uint32_t mr1_afdm_id, uint32_t runtime, uint32_t group_id, uint64_t cparam1, @@ -289,6 +351,30 @@ static uint32_t csd_exec_ranges(int fd, uint32_t pind, uint32_t mr0_afdm_id, return cmd.result; } +static uint32_t csd_exec_mrs(int fd, uint32_t pind, uint16_t rsid, + uint32_t runtime, uint32_t group_id, + uint64_t cparam1, uint64_t cparam2) +{ + struct nvme_passthru_cmd cmd = { 0 }; + struct csd_program_execute_cmd *exec = + (struct csd_program_execute_cmd *)&cmd; + + exec->opcode = CSD_CMD_EXEC; + exec->nsid = 1; + exec->pind = pind; + exec->rsid = rsid; + exec->cparam1 = cparam1; + exec->cparam2 = cparam2; + exec->group = group_id; + exec->runtime = runtime; + + if (submit(fd, &cmd)) { + exit(EXIT_FAILURE); + } + + return cmd.result; +} + static uint32_t csd_exec(int fd, uint32_t pind, uint32_t in_afdm_id, uint32_t out_afdm_id, uint32_t runtime, uint32_t group_id, uint64_t cparam1, @@ -607,6 +693,368 @@ static void run_ubpf_smoke(const char *dev, int fd, const char *elf_path, free(output); } +static void run_mrs_smoke(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t rsid; + uint16_t csf_id = 7; + int admin_fd; + struct csd_memory_range ranges[2]; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i * 4; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].len = 0; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].len = 0; + ranges[1].sb = in_id; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 2); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + printf("created MRS rsid=%" PRIu16 "\n", rsid); + csd_exec_mrs(fd, csf_id, rsid, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = i + i * 4; + + if (output[i] != expected) { + fprintf(stderr, "MRS smoke mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("MRS shared-library smoke passed\n"); + + free(input); + free(output); +} + +static void run_vadd_example(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t rsid; + uint16_t csf_id = 8; + int admin_fd; + struct csd_memory_range ranges[2]; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i * 2; + input[i * 2 + 1] = i * 2 + 1; + output[i] = 0; + } + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = in_id; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 2); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + csd_exec_mrs(fd, csf_id, rsid, 0, 0, COUNT, 0); + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = input[i * 2] + input[i * 2 + 1]; + + if (output[i] != expected) { + fprintf(stderr, "vadd example mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + printf("vadd example passed\n"); + + free(input); + free(output); +} + +static void run_sync_breakdown(const char *dev, int fd, const char *so_path, + uint32_t bytes, uint32_t iterations) +{ + int *input = NULL; + int *output = NULL; + uint32_t in_id; + uint32_t out_id; + uint16_t rsid; + uint16_t csf_id = 9; + int admin_fd; + struct csd_memory_range ranges[2]; + uint64_t copy_time = 0; + uint64_t exec_time = 0; + uint64_t read_time = 0; + uint64_t start; + uint64_t end; + uint32_t count; + uint32_t in_bytes; + + if (bytes == 0 || iterations == 0 || bytes % sizeof(int)) { + fprintf(stderr, "sync-breakdown requires non-zero int-aligned bytes and iterations\n"); + exit(EXIT_FAILURE); + } + count = bytes / sizeof(int); + in_bytes = bytes * 2; + + if (posix_memalign((void **)&input, 4096, (in_bytes + 4095U) & ~4095U) || + posix_memalign((void **)&output, 4096, (bytes + 4095U) & ~4095U)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (uint32_t i = 0; i < count; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i + 1; + output[i] = 0; + } + + in_id = csd_alloc(fd, in_bytes); + out_id = csd_alloc(fd, bytes); + csd_write(fd, out_id, 0, output, bytes); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = in_id; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 2); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd", 0, 0); + csd_admin_activation(admin_fd, csf_id, 1); + + if (pwrite(fd, input, in_bytes, 0) != (ssize_t)in_bytes) { + perror("pwrite nvm"); + exit(EXIT_FAILURE); + } + fsync(fd); + + for (uint32_t i = 0; i < iterations; i++) { + start = monotonic_ns(); + csd_nvm_to_afdm(fd, in_id, 0, 0, + (uint16_t)((in_bytes + 511U) / 512U - 1)); + end = monotonic_ns(); + copy_time += end - start; + + start = monotonic_ns(); + csd_exec_mrs(fd, csf_id, rsid, 0, 0, count, 0); + end = monotonic_ns(); + exec_time += end - start; + + start = monotonic_ns(); + csd_read(fd, out_id, 0, output, bytes); + end = monotonic_ns(); + read_time += end - start; + } + + for (uint32_t i = 0; i < count; i++) { + int expected = input[i * 2] + input[i * 2 + 1]; + + if (output[i] != expected) { + fprintf(stderr, "sync breakdown mismatch at %u: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + printf("breakdown nvm_to_afdm bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + in_bytes, iterations, copy_time / iterations); + printf("breakdown exec bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + bytes, iterations, exec_time / iterations); + printf("breakdown afdm_read bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", + bytes, iterations, read_time / iterations); + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + + free(input); + free(output); +} + +static void run_so_smoke(const char *dev, int fd, const char *so_path); +static void run_original_so_smoke(const char *dev, int fd, const char *so_path); + +static void run_indirect_vadd(const char *dev, int fd, const char *so_path) +{ + enum { COUNT = 1024 }; + int *input = NULL; + int *output = NULL; + int *global_mem = NULL; + uint32_t in_id; + uint32_t out_id; + uint32_t global_id; + uint16_t rsid; + uint16_t csf_id = 10; + int admin_fd; + struct csd_memory_range ranges[3]; + uint8_t task_info[16 + sizeof(int)] = { 0 }; + + if (posix_memalign((void **)&input, 4096, 8192) || + posix_memalign((void **)&output, 4096, 4096) || + posix_memalign((void **)&global_mem, 4096, 4096)) { + perror("posix_memalign"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < COUNT; i++) { + input[i * 2] = i; + input[i * 2 + 1] = i + 7; + output[i] = 0; + } + global_mem[0] = 0; + global_mem[1] = 0; + + in_id = csd_alloc(fd, 8192); + out_id = csd_alloc(fd, 4096); + global_id = csd_alloc(fd, 4096); + csd_write(fd, in_id, 0, input, 8192); + csd_write(fd, out_id, 0, output, 4096); + csd_write(fd, global_id, 0, global_mem, 4096); + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = out_id; + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = in_id; + ranges[2].nsid = CSD_MR_AFDM_NSID; + ranges[2].sb = global_id; + + /* + * Original CEMU indirect execute starts with: + * nr_concurrent_chunks, destination, nr_total_input_cf2, nr_total_output_cf2. + * This FDMFS-free smoke uses pre-filled AFDM ranges, so copy-format lists + * are intentionally empty while the indirect CSF ABI is still exercised. + */ + ((int *)task_info)[0] = 1; + ((int *)task_info)[1] = 0; + ((int *)task_info)[2] = 0; + ((int *)task_info)[3] = 0; + + admin_fd = open_admin_from_namespace(dev); + rsid = csd_admin_create_mrs(admin_fd, ranges, 3); + csd_admin_load_program(admin_fd, csf_id, CSD_CSF_TYPE_SHARED_LIB, + so_path, "csd_vadd_indirect", + CSD_LOAD_FLAG_INDIRECT, 0); + csd_admin_activation(admin_fd, csf_id, 1); + if (csd_exec_mrs(fd, csf_id, rsid, 0, 0, COUNT, 0) == 0) { + fprintf(stderr, "indirect vadd returned zero blocks\n"); + exit(EXIT_FAILURE); + } + csd_read(fd, out_id, 0, output, 4096); + + for (int i = 0; i < COUNT; i++) { + int expected = input[i * 2] + input[i * 2 + 1]; + + if (output[i] != expected) { + fprintf(stderr, "indirect vadd mismatch at %d: got %d expected %d\n", + i, output[i], expected); + exit(EXIT_FAILURE); + } + } + + csd_admin_activation(admin_fd, csf_id, 0); + csd_admin_unload_program(admin_fd, csf_id); + csd_admin_delete_mrs(admin_fd, rsid); + close(admin_fd); + csd_dealloc(fd, in_id); + csd_dealloc(fd, out_id); + csd_dealloc(fd, global_id); + printf("indirect vadd smoke passed\n"); + + free(input); + free(output); + free(global_mem); +} + +static void run_benchmark_kernels(const char *dev, int fd, const char *vadd_so, + const char *kernels_so, uint32_t iterations) +{ + uint64_t start; + uint64_t end; + + if (iterations == 0) { + fprintf(stderr, "benchmark-kernels requires non-zero iterations\n"); + exit(EXIT_FAILURE); + } + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + run_so_smoke(dev, fd, vadd_so); + } + end = monotonic_ns(); + printf("benchmark-kernel name=vadd iterations=%u avg_ns=%" PRIu64 "\n", + iterations, (end - start) / iterations); + + start = monotonic_ns(); + for (uint32_t i = 0; i < iterations; i++) { + run_original_so_smoke(dev, fd, kernels_so); + } + end = monotonic_ns(); + printf("benchmark-kernel name=knn_sql_grep_lz4 iterations=%u avg_ns=%" PRIu64 "\n", + iterations, (end - start) / iterations); +} + static void run_original_so_smoke(const char *dev, int fd, const char *so_path) { int admin_fd = open_admin_from_namespace(dev); @@ -772,7 +1220,7 @@ static void run_bench(int fd, uint32_t size, uint32_t iterations) fsync(fd); start = monotonic_ns(); for (uint32_t i = 0; i < iterations; i++) { - csd_nvm_to_afdm(fd, id, 0, 0, (uint16_t)((size + 4095U) / 4096U - 1)); + csd_nvm_to_afdm(fd, id, 0, 0, (uint16_t)((size + 511U) / 512U - 1)); } end = monotonic_ns(); printf("bench nvm_to_afdm bytes=%u iterations=%u avg_ns=%" PRIu64 "\n", @@ -827,6 +1275,39 @@ int main(int argc, char **argv) jit = (uint8_t)parse_u64(argv[4], "jit"); } run_ubpf_smoke(dev, fd, argv[3], jit ? 1 : 0); + } else if (!strcmp(op, "smoke-mrs")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_mrs_smoke(dev, fd, argv[3]); + } else if (!strcmp(op, "vadd-example")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_vadd_example(dev, fd, argv[3]); + } else if (!strcmp(op, "sync-breakdown")) { + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_sync_breakdown(dev, fd, argv[3], + (uint32_t)parse_u64(argv[4], "bytes"), + (uint32_t)parse_u64(argv[5], "iterations")); + } else if (!strcmp(op, "indirect-vadd")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_indirect_vadd(dev, fd, argv[3]); + } else if (!strcmp(op, "benchmark-kernels")) { + if (argc != 6) { + usage(argv[0]); + return EXIT_FAILURE; + } + run_benchmark_kernels(dev, fd, argv[3], argv[4], + (uint32_t)parse_u64(argv[5], "iterations")); } else if (!strcmp(op, "bench")) { if (argc != 5) { usage(argv[0]); @@ -907,6 +1388,28 @@ int main(int argc, char **argv) return EXIT_FAILURE; } csd_admin_unload_program(fd, (uint16_t)parse_u64(argv[3], "pind")); + } else if (!strcmp(op, "admin-create-mrs")) { + struct csd_memory_range ranges[2]; + uint16_t rsid; + + if (argc != 5) { + usage(argv[0]); + return EXIT_FAILURE; + } + + memset(ranges, 0, sizeof(ranges)); + ranges[0].nsid = CSD_MR_AFDM_NSID; + ranges[0].sb = (uint32_t)parse_u64(argv[3], "out-afdm-id"); + ranges[1].nsid = CSD_MR_AFDM_NSID; + ranges[1].sb = (uint32_t)parse_u64(argv[4], "in-afdm-id"); + rsid = csd_admin_create_mrs(fd, ranges, 2); + printf("%" PRIu16 "\n", rsid); + } else if (!strcmp(op, "admin-delete-mrs")) { + if (argc != 4) { + usage(argv[0]); + return EXIT_FAILURE; + } + csd_admin_delete_mrs(fd, (uint16_t)parse_u64(argv[3], "rsid")); } else if (!strcmp(op, "exec")) { uint32_t runtime = 0; uint32_t group_id = 0; diff --git a/tests/femu-csd/csd-vadd.c b/tests/femu-csd/csd-vadd.c index 322e4805449..4d8f4efbc57 100644 --- a/tests/femu-csd/csd-vadd.c +++ b/tests/femu-csd/csd-vadd.c @@ -1,4 +1,5 @@ #include +#include #include "femu-csd-kernel.h" int64_t csd_vadd(struct femu_csd_args *args) @@ -24,3 +25,38 @@ int64_t csd_vadd(struct femu_csd_args *args) return count; } + +int64_t csd_vadd_indirect(struct femu_csd_args *args) +{ + int *output; + int *input; + int *global_mem; + long long count = args->cparam1; + int pos; + int start_loc; + + if (args->numr < 3 || count < 0) { + return -1; + } + + output = args->mr_addr[0]; + input = args->mr_addr[1]; + global_mem = args->mr_addr[2]; + pos = global_mem[0]; + start_loc = global_mem[1]; + + if (start_loc > 0 && pos > 0) { + memmove(output, input + start_loc, (pos - start_loc) * sizeof(int)); + pos -= start_loc; + start_loc = 0; + } + + for (long long i = 0; i < count; i++) { + output[pos++] = input[i * 2] + input[i * 2 + 1]; + } + + global_mem[1] = (pos / (512 / (int)sizeof(int))) * (512 / (int)sizeof(int)); + global_mem[0] = pos; + + return global_mem[1] / (512 / (int)sizeof(int)); +}