summaryrefslogtreecommitdiffstats
path: root/hw/nvme/nvme.h
diff options
context:
space:
mode:
authorDaniel Baumann <mail@daniel-baumann.ch>2025-06-06 10:05:23 +0000
committerDaniel Baumann <mail@daniel-baumann.ch>2025-06-06 10:05:23 +0000
commit755cc582a2473d06f3a2131d506d0311cc70e9f9 (patch)
tree3efb1ddb8d57bbb4539ac0d229b384871c57820f /hw/nvme/nvme.h
parentInitial commit. (diff)
downloadqemu-upstream.tar.xz
qemu-upstream.zip
Adding upstream version 1:7.2+dfsg.upstream/1%7.2+dfsgupstream
Signed-off-by: Daniel Baumann <mail@daniel-baumann.ch>
Diffstat (limited to 'hw/nvme/nvme.h')
-rw-r--r--hw/nvme/nvme.h587
1 files changed, 587 insertions, 0 deletions
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
new file mode 100644
index 00000000..7adf042e
--- /dev/null
+++ b/hw/nvme/nvme.h
@@ -0,0 +1,587 @@
+/*
+ * QEMU NVM Express
+ *
+ * Copyright (c) 2012 Intel Corporation
+ * Copyright (c) 2021 Minwoo Im
+ * Copyright (c) 2021 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ * Keith Busch <kbusch@kernel.org>
+ * Klaus Jensen <k.jensen@samsung.com>
+ * Gollu Appalanaidu <anaidu.gollu@samsung.com>
+ * Dmitry Fomichev <dmitry.fomichev@wdc.com>
+ * Minwoo Im <minwoo.im.dev@gmail.com>
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ */
+
+#ifndef HW_NVME_NVME_H
+#define HW_NVME_NVME_H
+
+#include "qemu/uuid.h"
+#include "hw/pci/pci.h"
+#include "hw/block/block.h"
+
+#include "block/nvme.h"
+
+#define NVME_MAX_CONTROLLERS 256
+#define NVME_MAX_NAMESPACES 256
+#define NVME_EUI64_DEFAULT ((uint64_t)0x5254000000000000)
+
+QEMU_BUILD_BUG_ON(NVME_MAX_NAMESPACES > NVME_NSID_BROADCAST - 1);
+
+typedef struct NvmeCtrl NvmeCtrl;
+typedef struct NvmeNamespace NvmeNamespace;
+
+#define TYPE_NVME_BUS "nvme-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(NvmeBus, NVME_BUS)
+
+typedef struct NvmeBus {
+ BusState parent_bus;
+} NvmeBus;
+
+#define TYPE_NVME_SUBSYS "nvme-subsys"
+#define NVME_SUBSYS(obj) \
+ OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+#define SUBSYS_SLOT_RSVD (void *)0xFFFF
+
+typedef struct NvmeSubsystem {
+ DeviceState parent_obj;
+ NvmeBus bus;
+ uint8_t subnqn[256];
+ char *serial;
+
+ NvmeCtrl *ctrls[NVME_MAX_CONTROLLERS];
+ NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+
+ struct {
+ char *nqn;
+ } params;
+} NvmeSubsystem;
+
+int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp);
+void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n);
+
+static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
+ uint32_t cntlid)
+{
+ if (!subsys || cntlid >= NVME_MAX_CONTROLLERS) {
+ return NULL;
+ }
+
+ if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
+ return NULL;
+ }
+
+ return subsys->ctrls[cntlid];
+}
+
+static inline NvmeNamespace *nvme_subsys_ns(NvmeSubsystem *subsys,
+ uint32_t nsid)
+{
+ if (!subsys || !nsid || nsid > NVME_MAX_NAMESPACES) {
+ return NULL;
+ }
+
+ return subsys->namespaces[nsid];
+}
+
+#define TYPE_NVME_NS "nvme-ns"
+#define NVME_NS(obj) \
+ OBJECT_CHECK(NvmeNamespace, (obj), TYPE_NVME_NS)
+
+typedef struct NvmeZone {
+ NvmeZoneDescr d;
+ uint64_t w_ptr;
+ QTAILQ_ENTRY(NvmeZone) entry;
+} NvmeZone;
+
+typedef struct NvmeNamespaceParams {
+ bool detached;
+ bool shared;
+ uint32_t nsid;
+ QemuUUID uuid;
+ uint64_t eui64;
+ bool eui64_default;
+
+ uint16_t ms;
+ uint8_t mset;
+ uint8_t pi;
+ uint8_t pil;
+ uint8_t pif;
+
+ uint16_t mssrl;
+ uint32_t mcl;
+ uint8_t msrc;
+
+ bool zoned;
+ bool cross_zone_read;
+ uint64_t zone_size_bs;
+ uint64_t zone_cap_bs;
+ uint32_t max_active_zones;
+ uint32_t max_open_zones;
+ uint32_t zd_extension_size;
+
+ uint32_t numzrwa;
+ uint64_t zrwas;
+ uint64_t zrwafg;
+} NvmeNamespaceParams;
+
+typedef struct NvmeNamespace {
+ DeviceState parent_obj;
+ BlockConf blkconf;
+ int32_t bootindex;
+ int64_t size;
+ int64_t moff;
+ NvmeIdNs id_ns;
+ NvmeIdNsNvm id_ns_nvm;
+ NvmeLBAF lbaf;
+ unsigned int nlbaf;
+ size_t lbasz;
+ const uint32_t *iocs;
+ uint8_t csi;
+ uint16_t status;
+ int attached;
+ uint8_t pif;
+
+ struct {
+ uint16_t zrwas;
+ uint16_t zrwafg;
+ uint32_t numzrwa;
+ } zns;
+
+ QTAILQ_ENTRY(NvmeNamespace) entry;
+
+ NvmeIdNsZoned *id_ns_zoned;
+ NvmeZone *zone_array;
+ QTAILQ_HEAD(, NvmeZone) exp_open_zones;
+ QTAILQ_HEAD(, NvmeZone) imp_open_zones;
+ QTAILQ_HEAD(, NvmeZone) closed_zones;
+ QTAILQ_HEAD(, NvmeZone) full_zones;
+ uint32_t num_zones;
+ uint64_t zone_size;
+ uint64_t zone_capacity;
+ uint32_t zone_size_log2;
+ uint8_t *zd_extensions;
+ int32_t nr_open_zones;
+ int32_t nr_active_zones;
+
+ NvmeNamespaceParams params;
+
+ struct {
+ uint32_t err_rec;
+ } features;
+} NvmeNamespace;
+
+static inline uint32_t nvme_nsid(NvmeNamespace *ns)
+{
+ if (ns) {
+ return ns->params.nsid;
+ }
+
+ return 0;
+}
+
+static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
+{
+ return lba << ns->lbaf.ds;
+}
+
+static inline size_t nvme_m2b(NvmeNamespace *ns, uint64_t lba)
+{
+ return ns->lbaf.ms * lba;
+}
+
+static inline int64_t nvme_moff(NvmeNamespace *ns, uint64_t lba)
+{
+ return ns->moff + nvme_m2b(ns, lba);
+}
+
+static inline bool nvme_ns_ext(NvmeNamespace *ns)
+{
+ return !!NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas);
+}
+
+static inline NvmeZoneState nvme_get_zone_state(NvmeZone *zone)
+{
+ return zone->d.zs >> 4;
+}
+
+static inline void nvme_set_zone_state(NvmeZone *zone, NvmeZoneState state)
+{
+ zone->d.zs = state << 4;
+}
+
+static inline uint64_t nvme_zone_rd_boundary(NvmeNamespace *ns, NvmeZone *zone)
+{
+ return zone->d.zslba + ns->zone_size;
+}
+
+static inline uint64_t nvme_zone_wr_boundary(NvmeZone *zone)
+{
+ return zone->d.zslba + zone->d.zcap;
+}
+
+static inline bool nvme_wp_is_valid(NvmeZone *zone)
+{
+ uint8_t st = nvme_get_zone_state(zone);
+
+ return st != NVME_ZONE_STATE_FULL &&
+ st != NVME_ZONE_STATE_READ_ONLY &&
+ st != NVME_ZONE_STATE_OFFLINE;
+}
+
+static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
+ uint32_t zone_idx)
+{
+ return &ns->zd_extensions[zone_idx * ns->params.zd_extension_size];
+}
+
+static inline void nvme_aor_inc_open(NvmeNamespace *ns)
+{
+ assert(ns->nr_open_zones >= 0);
+ if (ns->params.max_open_zones) {
+ ns->nr_open_zones++;
+ assert(ns->nr_open_zones <= ns->params.max_open_zones);
+ }
+}
+
+static inline void nvme_aor_dec_open(NvmeNamespace *ns)
+{
+ if (ns->params.max_open_zones) {
+ assert(ns->nr_open_zones > 0);
+ ns->nr_open_zones--;
+ }
+ assert(ns->nr_open_zones >= 0);
+}
+
+static inline void nvme_aor_inc_active(NvmeNamespace *ns)
+{
+ assert(ns->nr_active_zones >= 0);
+ if (ns->params.max_active_zones) {
+ ns->nr_active_zones++;
+ assert(ns->nr_active_zones <= ns->params.max_active_zones);
+ }
+}
+
+static inline void nvme_aor_dec_active(NvmeNamespace *ns)
+{
+ if (ns->params.max_active_zones) {
+ assert(ns->nr_active_zones > 0);
+ ns->nr_active_zones--;
+ assert(ns->nr_active_zones >= ns->nr_open_zones);
+ }
+ assert(ns->nr_active_zones >= 0);
+}
+
+void nvme_ns_init_format(NvmeNamespace *ns);
+int nvme_ns_setup(NvmeNamespace *ns, Error **errp);
+void nvme_ns_drain(NvmeNamespace *ns);
+void nvme_ns_shutdown(NvmeNamespace *ns);
+void nvme_ns_cleanup(NvmeNamespace *ns);
+
+typedef struct NvmeAsyncEvent {
+ QTAILQ_ENTRY(NvmeAsyncEvent) entry;
+ NvmeAerResult result;
+} NvmeAsyncEvent;
+
+enum {
+ NVME_SG_ALLOC = 1 << 0,
+ NVME_SG_DMA = 1 << 1,
+};
+
+typedef struct NvmeSg {
+ int flags;
+
+ union {
+ QEMUSGList qsg;
+ QEMUIOVector iov;
+ };
+} NvmeSg;
+
+typedef enum NvmeTxDirection {
+ NVME_TX_DIRECTION_TO_DEVICE = 0,
+ NVME_TX_DIRECTION_FROM_DEVICE = 1,
+} NvmeTxDirection;
+
+typedef struct NvmeRequest {
+ struct NvmeSQueue *sq;
+ struct NvmeNamespace *ns;
+ BlockAIOCB *aiocb;
+ uint16_t status;
+ void *opaque;
+ NvmeCqe cqe;
+ NvmeCmd cmd;
+ BlockAcctCookie acct;
+ NvmeSg sg;
+ QTAILQ_ENTRY(NvmeRequest)entry;
+} NvmeRequest;
+
+typedef struct NvmeBounceContext {
+ NvmeRequest *req;
+
+ struct {
+ QEMUIOVector iov;
+ uint8_t *bounce;
+ } data, mdata;
+} NvmeBounceContext;
+
+static inline const char *nvme_adm_opc_str(uint8_t opc)
+{
+ switch (opc) {
+ case NVME_ADM_CMD_DELETE_SQ: return "NVME_ADM_CMD_DELETE_SQ";
+ case NVME_ADM_CMD_CREATE_SQ: return "NVME_ADM_CMD_CREATE_SQ";
+ case NVME_ADM_CMD_GET_LOG_PAGE: return "NVME_ADM_CMD_GET_LOG_PAGE";
+ case NVME_ADM_CMD_DELETE_CQ: return "NVME_ADM_CMD_DELETE_CQ";
+ case NVME_ADM_CMD_CREATE_CQ: return "NVME_ADM_CMD_CREATE_CQ";
+ case NVME_ADM_CMD_IDENTIFY: return "NVME_ADM_CMD_IDENTIFY";
+ case NVME_ADM_CMD_ABORT: return "NVME_ADM_CMD_ABORT";
+ case NVME_ADM_CMD_SET_FEATURES: return "NVME_ADM_CMD_SET_FEATURES";
+ case NVME_ADM_CMD_GET_FEATURES: return "NVME_ADM_CMD_GET_FEATURES";
+ case NVME_ADM_CMD_ASYNC_EV_REQ: return "NVME_ADM_CMD_ASYNC_EV_REQ";
+ case NVME_ADM_CMD_NS_ATTACHMENT: return "NVME_ADM_CMD_NS_ATTACHMENT";
+ case NVME_ADM_CMD_VIRT_MNGMT: return "NVME_ADM_CMD_VIRT_MNGMT";
+ case NVME_ADM_CMD_DBBUF_CONFIG: return "NVME_ADM_CMD_DBBUF_CONFIG";
+ case NVME_ADM_CMD_FORMAT_NVM: return "NVME_ADM_CMD_FORMAT_NVM";
+ default: return "NVME_ADM_CMD_UNKNOWN";
+ }
+}
+
+static inline const char *nvme_io_opc_str(uint8_t opc)
+{
+ switch (opc) {
+ case NVME_CMD_FLUSH: return "NVME_NVM_CMD_FLUSH";
+ case NVME_CMD_WRITE: return "NVME_NVM_CMD_WRITE";
+ case NVME_CMD_READ: return "NVME_NVM_CMD_READ";
+ case NVME_CMD_COMPARE: return "NVME_NVM_CMD_COMPARE";
+ case NVME_CMD_WRITE_ZEROES: return "NVME_NVM_CMD_WRITE_ZEROES";
+ case NVME_CMD_DSM: return "NVME_NVM_CMD_DSM";
+ case NVME_CMD_VERIFY: return "NVME_NVM_CMD_VERIFY";
+ case NVME_CMD_COPY: return "NVME_NVM_CMD_COPY";
+ case NVME_CMD_ZONE_MGMT_SEND: return "NVME_ZONED_CMD_MGMT_SEND";
+ case NVME_CMD_ZONE_MGMT_RECV: return "NVME_ZONED_CMD_MGMT_RECV";
+ case NVME_CMD_ZONE_APPEND: return "NVME_ZONED_CMD_ZONE_APPEND";
+ default: return "NVME_NVM_CMD_UNKNOWN";
+ }
+}
+
+typedef struct NvmeSQueue {
+ struct NvmeCtrl *ctrl;
+ uint16_t sqid;
+ uint16_t cqid;
+ uint32_t head;
+ uint32_t tail;
+ uint32_t size;
+ uint64_t dma_addr;
+ uint64_t db_addr;
+ uint64_t ei_addr;
+ QEMUBH *bh;
+ EventNotifier notifier;
+ bool ioeventfd_enabled;
+ NvmeRequest *io_req;
+ QTAILQ_HEAD(, NvmeRequest) req_list;
+ QTAILQ_HEAD(, NvmeRequest) out_req_list;
+ QTAILQ_ENTRY(NvmeSQueue) entry;
+} NvmeSQueue;
+
+typedef struct NvmeCQueue {
+ struct NvmeCtrl *ctrl;
+ uint8_t phase;
+ uint16_t cqid;
+ uint16_t irq_enabled;
+ uint32_t head;
+ uint32_t tail;
+ uint32_t vector;
+ uint32_t size;
+ uint64_t dma_addr;
+ uint64_t db_addr;
+ uint64_t ei_addr;
+ QEMUBH *bh;
+ EventNotifier notifier;
+ bool ioeventfd_enabled;
+ QTAILQ_HEAD(, NvmeSQueue) sq_list;
+ QTAILQ_HEAD(, NvmeRequest) req_list;
+} NvmeCQueue;
+
+#define TYPE_NVME "nvme"
+#define NVME(obj) \
+ OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME)
+
+typedef struct NvmeParams {
+ char *serial;
+ uint32_t num_queues; /* deprecated since 5.1 */
+ uint32_t max_ioqpairs;
+ uint16_t msix_qsize;
+ uint32_t cmb_size_mb;
+ uint8_t aerl;
+ uint32_t aer_max_queued;
+ uint8_t mdts;
+ uint8_t vsl;
+ bool use_intel_id;
+ uint8_t zasl;
+ bool auto_transition_zones;
+ bool legacy_cmb;
+ bool ioeventfd;
+ uint8_t sriov_max_vfs;
+ uint16_t sriov_vq_flexible;
+ uint16_t sriov_vi_flexible;
+ uint8_t sriov_max_vq_per_vf;
+ uint8_t sriov_max_vi_per_vf;
+} NvmeParams;
+
+typedef struct NvmeCtrl {
+ PCIDevice parent_obj;
+ MemoryRegion bar0;
+ MemoryRegion iomem;
+ NvmeBar bar;
+ NvmeParams params;
+ NvmeBus bus;
+
+ uint16_t cntlid;
+ bool qs_created;
+ uint32_t page_size;
+ uint16_t page_bits;
+ uint16_t max_prp_ents;
+ uint16_t cqe_size;
+ uint16_t sqe_size;
+ uint32_t max_q_ents;
+ uint8_t outstanding_aers;
+ uint32_t irq_status;
+ int cq_pending;
+ uint64_t host_timestamp; /* Timestamp sent by the host */
+ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */
+ uint64_t starttime_ms;
+ uint16_t temperature;
+ uint8_t smart_critical_warning;
+ uint32_t conf_msix_qsize;
+ uint32_t conf_ioqpairs;
+ uint64_t dbbuf_dbs;
+ uint64_t dbbuf_eis;
+ bool dbbuf_enabled;
+
+ struct {
+ MemoryRegion mem;
+ uint8_t *buf;
+ bool cmse;
+ hwaddr cba;
+ } cmb;
+
+ struct {
+ HostMemoryBackend *dev;
+ bool cmse;
+ hwaddr cba;
+ } pmr;
+
+ uint8_t aer_mask;
+ NvmeRequest **aer_reqs;
+ QTAILQ_HEAD(, NvmeAsyncEvent) aer_queue;
+ int aer_queued;
+
+ uint32_t dmrsl;
+
+ /* Namespace ID is started with 1 so bitmap should be 1-based */
+#define NVME_CHANGED_NSID_SIZE (NVME_MAX_NAMESPACES + 1)
+ DECLARE_BITMAP(changed_nsids, NVME_CHANGED_NSID_SIZE);
+
+ NvmeSubsystem *subsys;
+
+ NvmeNamespace namespace;
+ NvmeNamespace *namespaces[NVME_MAX_NAMESPACES + 1];
+ NvmeSQueue **sq;
+ NvmeCQueue **cq;
+ NvmeSQueue admin_sq;
+ NvmeCQueue admin_cq;
+ NvmeIdCtrl id_ctrl;
+
+ struct {
+ struct {
+ uint16_t temp_thresh_hi;
+ uint16_t temp_thresh_low;
+ };
+
+ uint32_t async_config;
+ NvmeHostBehaviorSupport hbs;
+ } features;
+
+ NvmePriCtrlCap pri_ctrl_cap;
+ NvmeSecCtrlList sec_ctrl_list;
+ struct {
+ uint16_t vqrfap;
+ uint16_t virfap;
+ } next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */
+} NvmeCtrl;
+
+typedef enum NvmeResetType {
+ NVME_RESET_FUNCTION = 0,
+ NVME_RESET_CONTROLLER = 1,
+} NvmeResetType;
+
+static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
+{
+ if (!nsid || nsid > NVME_MAX_NAMESPACES) {
+ return NULL;
+ }
+
+ return n->namespaces[nsid];
+}
+
+static inline NvmeCQueue *nvme_cq(NvmeRequest *req)
+{
+ NvmeSQueue *sq = req->sq;
+ NvmeCtrl *n = sq->ctrl;
+
+ return n->cq[sq->cqid];
+}
+
+static inline NvmeCtrl *nvme_ctrl(NvmeRequest *req)
+{
+ NvmeSQueue *sq = req->sq;
+ return sq->ctrl;
+}
+
+static inline uint16_t nvme_cid(NvmeRequest *req)
+{
+ if (!req) {
+ return 0xffff;
+ }
+
+ return le16_to_cpu(req->cqe.cid);
+}
+
+static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
+{
+ PCIDevice *pci_dev = &n->parent_obj;
+ NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
+
+ if (pci_is_vf(pci_dev)) {
+ return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
+ }
+
+ return NULL;
+}
+
+static inline NvmeSecCtrlEntry *nvme_sctrl_for_cntlid(NvmeCtrl *n,
+ uint16_t cntlid)
+{
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ uint8_t i;
+
+ for (i = 0; i < list->numcntl; i++) {
+ if (le16_to_cpu(list->sec[i].scid) == cntlid) {
+ return &list->sec[i];
+ }
+ }
+
+ return NULL;
+}
+
+void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
+uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
+ NvmeTxDirection dir, NvmeRequest *req);
+uint16_t nvme_bounce_mdata(NvmeCtrl *n, void *ptr, uint32_t len,
+ NvmeTxDirection dir, NvmeRequest *req);
+void nvme_rw_complete_cb(void *opaque, int ret);
+uint16_t nvme_map_dptr(NvmeCtrl *n, NvmeSg *sg, size_t len,
+ NvmeCmd *cmd);
+
+#endif /* HW_NVME_NVME_H */