summaryrefslogtreecommitdiffstats
path: root/hw/mem
diff options
context:
space:
mode:
Diffstat (limited to 'hw/mem')
-rw-r--r--hw/mem/Kconfig18
-rw-r--r--hw/mem/cxl_type3.c647
-rw-r--r--hw/mem/memory-device.c346
-rw-r--r--hw/mem/meson.build10
-rw-r--r--hw/mem/npcm7xx_mc.c84
-rw-r--r--hw/mem/nvdimm.c282
-rw-r--r--hw/mem/pc-dimm.c312
-rw-r--r--hw/mem/sparse-mem.c150
-rw-r--r--hw/mem/trace-events8
-rw-r--r--hw/mem/trace.h1
10 files changed, 1858 insertions, 0 deletions
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig
new file mode 100644
index 00000000..73c5ae8a
--- /dev/null
+++ b/hw/mem/Kconfig
@@ -0,0 +1,18 @@
+config DIMM
+ bool
+ select MEM_DEVICE
+
+config MEM_DEVICE
+ bool
+
+config NVDIMM
+ bool
+ select MEM_DEVICE
+
+config SPARSE_MEM
+ bool
+
+config CXL_MEM_DEVICE
+ bool
+ default y if CXL
+ select MEM_DEVICE
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
new file mode 100644
index 00000000..25559020
--- /dev/null
+++ b/hw/mem/cxl_type3.c
@@ -0,0 +1,647 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "hw/mem/memory-device.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/pmem.h"
+#include "qemu/range.h"
+#include "qemu/rcu.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "hw/cxl/cxl.h"
+#include "hw/pci/msix.h"
+
+#define DWORD_BYTE 4
+
+/* Default CDAT entries for a memory region */
+enum {
+ CT3_CDAT_DSMAS,
+ CT3_CDAT_DSLBIS0,
+ CT3_CDAT_DSLBIS1,
+ CT3_CDAT_DSLBIS2,
+ CT3_CDAT_DSLBIS3,
+ CT3_CDAT_DSEMTS,
+ CT3_CDAT_NUM_ENTRIES
+};
+
+static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table,
+ int dsmad_handle, MemoryRegion *mr)
+{
+ g_autofree CDATDsmas *dsmas = NULL;
+ g_autofree CDATDslbis *dslbis0 = NULL;
+ g_autofree CDATDslbis *dslbis1 = NULL;
+ g_autofree CDATDslbis *dslbis2 = NULL;
+ g_autofree CDATDslbis *dslbis3 = NULL;
+ g_autofree CDATDsemts *dsemts = NULL;
+
+ dsmas = g_malloc(sizeof(*dsmas));
+ if (!dsmas) {
+ return -ENOMEM;
+ }
+ *dsmas = (CDATDsmas) {
+ .header = {
+ .type = CDAT_TYPE_DSMAS,
+ .length = sizeof(*dsmas),
+ },
+ .DSMADhandle = dsmad_handle,
+ .flags = CDAT_DSMAS_FLAG_NV,
+ .DPA_base = 0,
+ .DPA_length = int128_get64(mr->size),
+ };
+
+ /* For now, no memory side cache, plausiblish numbers */
+ dslbis0 = g_malloc(sizeof(*dslbis0));
+ if (!dslbis0) {
+ return -ENOMEM;
+ }
+ *dslbis0 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis0),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_READ_LATENCY,
+ .entry_base_unit = 10000, /* 10ns base */
+ .entry[0] = 15, /* 150ns */
+ };
+
+ dslbis1 = g_malloc(sizeof(*dslbis1));
+ if (!dslbis1) {
+ return -ENOMEM;
+ }
+ *dslbis1 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis1),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_WRITE_LATENCY,
+ .entry_base_unit = 10000,
+ .entry[0] = 25, /* 250ns */
+ };
+
+ dslbis2 = g_malloc(sizeof(*dslbis2));
+ if (!dslbis2) {
+ return -ENOMEM;
+ }
+ *dslbis2 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis2),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_READ_BANDWIDTH,
+ .entry_base_unit = 1000, /* GB/s */
+ .entry[0] = 16,
+ };
+
+ dslbis3 = g_malloc(sizeof(*dslbis3));
+ if (!dslbis3) {
+ return -ENOMEM;
+ }
+ *dslbis3 = (CDATDslbis) {
+ .header = {
+ .type = CDAT_TYPE_DSLBIS,
+ .length = sizeof(*dslbis3),
+ },
+ .handle = dsmad_handle,
+ .flags = HMAT_LB_MEM_MEMORY,
+ .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH,
+ .entry_base_unit = 1000, /* GB/s */
+ .entry[0] = 16,
+ };
+
+ dsemts = g_malloc(sizeof(*dsemts));
+ if (!dsemts) {
+ return -ENOMEM;
+ }
+ *dsemts = (CDATDsemts) {
+ .header = {
+ .type = CDAT_TYPE_DSEMTS,
+ .length = sizeof(*dsemts),
+ },
+ .DSMAS_handle = dsmad_handle,
+ /* Reserved - the non volatile from DSMAS matters */
+ .EFI_memory_type_attr = 2,
+ .DPA_offset = 0,
+ .DPA_length = int128_get64(mr->size),
+ };
+
+ /* Header always at start of structure */
+ cdat_table[CT3_CDAT_DSMAS] = g_steal_pointer(&dsmas);
+ cdat_table[CT3_CDAT_DSLBIS0] = g_steal_pointer(&dslbis0);
+ cdat_table[CT3_CDAT_DSLBIS1] = g_steal_pointer(&dslbis1);
+ cdat_table[CT3_CDAT_DSLBIS2] = g_steal_pointer(&dslbis2);
+ cdat_table[CT3_CDAT_DSLBIS3] = g_steal_pointer(&dslbis3);
+ cdat_table[CT3_CDAT_DSEMTS] = g_steal_pointer(&dsemts);
+
+ return 0;
+}
+
+static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
+{
+ g_autofree CDATSubHeader **table = NULL;
+ MemoryRegion *nonvolatile_mr;
+ CXLType3Dev *ct3d = priv;
+ int dsmad_handle = 0;
+ int rc;
+
+ if (!ct3d->hostmem) {
+ return 0;
+ }
+
+ nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!nonvolatile_mr) {
+ return -EINVAL;
+ }
+
+ table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table));
+ if (!table) {
+ return -ENOMEM;
+ }
+
+ rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr);
+ if (rc < 0) {
+ return rc;
+ }
+
+ *cdat_table = g_steal_pointer(&table);
+
+ return CT3_CDAT_NUM_ENTRIES;
+}
+
+static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ g_free(cdat_table[i]);
+ }
+ g_free(cdat_table);
+}
+
+static bool cxl_doe_cdat_rsp(DOECap *doe_cap)
+{
+ CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat;
+ uint16_t ent;
+ void *base;
+ uint32_t len;
+ CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap);
+ CDATRsp rsp;
+
+ assert(cdat->entry_len);
+
+ /* Discard if request length mismatched */
+ if (pcie_doe_get_obj_len(req) <
+ DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) {
+ return false;
+ }
+
+ ent = req->entry_handle;
+ base = cdat->entry[ent].base;
+ len = cdat->entry[ent].length;
+
+ rsp = (CDATRsp) {
+ .header = {
+ .vendor_id = CXL_VENDOR_ID,
+ .data_obj_type = CXL_DOE_TABLE_ACCESS,
+ .reserved = 0x0,
+ .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE),
+ },
+ .rsp_code = CXL_DOE_TAB_RSP,
+ .table_type = CXL_DOE_TAB_TYPE_CDAT,
+ .entry_handle = (ent < cdat->entry_len - 1) ?
+ ent + 1 : CXL_DOE_TAB_ENT_MAX,
+ };
+
+ memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp));
+ memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE),
+ base, len);
+
+ doe_cap->read_mbox_len += rsp.header.length;
+
+ return true;
+}
+
+static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+ uint32_t val;
+
+ if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) {
+ return val;
+ }
+
+ return pci_default_read_config(pci_dev, addr, size);
+}
+
+static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val,
+ int size)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+
+ pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size);
+ pci_default_write_config(pci_dev, addr, val, size);
+}
+
+/*
+ * Null value of all Fs suggested by IEEE RA guidelines for use of
+ * EU, OUI and CID
+ */
+#define UI64_NULL ~(0ULL)
+
+static void build_dvsecs(CXLType3Dev *ct3d)
+{
+ CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
+ uint8_t *dvsec;
+
+ dvsec = (uint8_t *)&(CXLDVSECDevice){
+ .cap = 0x1e,
+ .ctrl = 0x2,
+ .status2 = 0x2,
+ .range1_size_hi = ct3d->hostmem->size >> 32,
+ .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 |
+ (ct3d->hostmem->size & 0xF0000000),
+ .range1_base_hi = 0,
+ .range1_base_lo = 0,
+ };
+ cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
+ PCIE_CXL_DEVICE_DVSEC_LENGTH,
+ PCIE_CXL_DEVICE_DVSEC,
+ PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec);
+
+ dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){
+ .rsvd = 0,
+ .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX,
+ .reg0_base_hi = 0,
+ .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX,
+ .reg1_base_hi = 0,
+ };
+ cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
+ REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC,
+ REG_LOC_DVSEC_REVID, dvsec);
+ dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){
+ .phase2_duration = 0x603, /* 3 seconds */
+ .phase2_power = 0x33, /* 0x33 miliwatts */
+ };
+ cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE,
+ GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC,
+ GPF_DEVICE_DVSEC_REVID, dvsec);
+}
+
+static void hdm_decoder_commit(CXLType3Dev *ct3d, int which)
+{
+ ComponentRegisters *cregs = &ct3d->cxl_cstate.crb;
+ uint32_t *cache_mem = cregs->cache_mem_registers;
+
+ assert(which == 0);
+
+ /* TODO: Sanity checks that the decoder is possible */
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0);
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0);
+
+ ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1);
+}
+
+static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value,
+ unsigned size)
+{
+ CXLComponentState *cxl_cstate = opaque;
+ ComponentRegisters *cregs = &cxl_cstate->crb;
+ CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate);
+ uint32_t *cache_mem = cregs->cache_mem_registers;
+ bool should_commit = false;
+ int which_hdm = -1;
+
+ assert(size == 4);
+ g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE);
+
+ switch (offset) {
+ case A_CXL_HDM_DECODER0_CTRL:
+ should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT);
+ which_hdm = 0;
+ break;
+ default:
+ break;
+ }
+
+ stl_le_p((uint8_t *)cache_mem + offset, value);
+ if (should_commit) {
+ hdm_decoder_commit(ct3d, which_hdm);
+ }
+}
+
+static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
+{
+ DeviceState *ds = DEVICE(ct3d);
+ MemoryRegion *mr;
+ char *name;
+
+ if (!ct3d->hostmem) {
+ error_setg(errp, "memdev property must be set");
+ return false;
+ }
+
+ mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!mr) {
+ error_setg(errp, "memdev property must be set");
+ return false;
+ }
+ memory_region_set_nonvolatile(mr, true);
+ memory_region_set_enabled(mr, true);
+ host_memory_backend_set_mapped(ct3d->hostmem, true);
+
+ if (ds->id) {
+ name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id);
+ } else {
+ name = g_strdup("cxl-type3-dpa-space");
+ }
+ address_space_init(&ct3d->hostmem_as, mr, name);
+ g_free(name);
+
+ ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size;
+
+ if (!ct3d->lsa) {
+ error_setg(errp, "lsa property must be set");
+ return false;
+ }
+
+ return true;
+}
+
+static DOEProtocol doe_cdat_prot[] = {
+ { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp },
+ { }
+};
+
+static void ct3_realize(PCIDevice *pci_dev, Error **errp)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+ CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
+ ComponentRegisters *regs = &cxl_cstate->crb;
+ MemoryRegion *mr = &regs->component_registers;
+ uint8_t *pci_conf = pci_dev->config;
+ unsigned short msix_num = 1;
+ int i;
+
+ if (!cxl_setup_memory(ct3d, errp)) {
+ return;
+ }
+
+ pci_config_set_prog_interface(pci_conf, 0x10);
+ pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL);
+
+ pcie_endpoint_cap_init(pci_dev, 0x80);
+ if (ct3d->sn != UI64_NULL) {
+ pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn);
+ cxl_cstate->dvsec_offset = 0x100 + 0x0c;
+ } else {
+ cxl_cstate->dvsec_offset = 0x100;
+ }
+
+ ct3d->cxl_cstate.pdev = pci_dev;
+ build_dvsecs(ct3d);
+
+ regs->special_ops = g_new0(MemoryRegionOps, 1);
+ regs->special_ops->write = ct3d_reg_write;
+
+ cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate,
+ TYPE_CXL_TYPE3);
+
+ pci_register_bar(
+ pci_dev, CXL_COMPONENT_REG_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr);
+
+ cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate);
+ pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ &ct3d->cxl_dstate.device_registers);
+
+ /* MSI(-X) Initailization */
+ msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL);
+ for (i = 0; i < msix_num; i++) {
+ msix_vector_use(pci_dev, i);
+ }
+
+ /* DOE Initailization */
+ pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0);
+
+ cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table;
+ cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table;
+ cxl_cstate->cdat.private = ct3d;
+ cxl_doe_cdat_init(cxl_cstate, errp);
+}
+
+static void ct3_exit(PCIDevice *pci_dev)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(pci_dev);
+ CXLComponentState *cxl_cstate = &ct3d->cxl_cstate;
+ ComponentRegisters *regs = &cxl_cstate->crb;
+
+ cxl_doe_cdat_release(cxl_cstate);
+ g_free(regs->special_ops);
+ address_space_destroy(&ct3d->hostmem_as);
+}
+
+/* TODO: Support multiple HDM decoders and DPA skip */
+static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa)
+{
+ uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers;
+ uint64_t decoder_base, decoder_size, hpa_offset;
+ uint32_t hdm0_ctrl;
+ int ig, iw;
+
+ decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) |
+ cache_mem[R_CXL_HDM_DECODER0_BASE_LO]);
+ if ((uint64_t)host_addr < decoder_base) {
+ return false;
+ }
+
+ hpa_offset = (uint64_t)host_addr - decoder_base;
+
+ decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) |
+ cache_mem[R_CXL_HDM_DECODER0_SIZE_LO];
+ if (hpa_offset >= decoder_size) {
+ return false;
+ }
+
+ hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL];
+ iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW);
+ ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG);
+
+ *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) |
+ ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw);
+
+ return true;
+}
+
+MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(d);
+ uint64_t dpa_offset;
+ MemoryRegion *mr;
+
+ /* TODO support volatile region */
+ mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!mr) {
+ return MEMTX_ERROR;
+ }
+
+ if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) {
+ return MEMTX_ERROR;
+ }
+
+ if (dpa_offset > int128_get64(mr->size)) {
+ return MEMTX_ERROR;
+ }
+
+ return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size);
+}
+
+MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data,
+ unsigned size, MemTxAttrs attrs)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(d);
+ uint64_t dpa_offset;
+ MemoryRegion *mr;
+
+ mr = host_memory_backend_get_memory(ct3d->hostmem);
+ if (!mr) {
+ return MEMTX_OK;
+ }
+
+ if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) {
+ return MEMTX_OK;
+ }
+
+ if (dpa_offset > int128_get64(mr->size)) {
+ return MEMTX_OK;
+ }
+ return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs,
+ &data, size);
+}
+
+static void ct3d_reset(DeviceState *dev)
+{
+ CXLType3Dev *ct3d = CXL_TYPE3(dev);
+ uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers;
+ uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask;
+
+ cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE);
+ cxl_device_register_init_common(&ct3d->cxl_dstate);
+}
+
+static Property ct3_props[] = {
+ DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND,
+ HostMemoryBackend *),
+ DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND,
+ HostMemoryBackend *),
+ DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL),
+ DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static uint64_t get_lsa_size(CXLType3Dev *ct3d)
+{
+ MemoryRegion *mr;
+
+ mr = host_memory_backend_get_memory(ct3d->lsa);
+ return memory_region_size(mr);
+}
+
+static void validate_lsa_access(MemoryRegion *mr, uint64_t size,
+ uint64_t offset)
+{
+ assert(offset + size <= memory_region_size(mr));
+ assert(offset + size > offset);
+}
+
+static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size,
+ uint64_t offset)
+{
+ MemoryRegion *mr;
+ void *lsa;
+
+ mr = host_memory_backend_get_memory(ct3d->lsa);
+ validate_lsa_access(mr, size, offset);
+
+ lsa = memory_region_get_ram_ptr(mr) + offset;
+ memcpy(buf, lsa, size);
+
+ return size;
+}
+
+static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size,
+ uint64_t offset)
+{
+ MemoryRegion *mr;
+ void *lsa;
+
+ mr = host_memory_backend_get_memory(ct3d->lsa);
+ validate_lsa_access(mr, size, offset);
+
+ lsa = memory_region_get_ram_ptr(mr) + offset;
+ memcpy(lsa, buf, size);
+ memory_region_set_dirty(mr, offset, size);
+
+ /*
+ * Just like the PMEM, if the guest is not allowed to exit gracefully, label
+ * updates will get lost.
+ */
+}
+
+static void ct3_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
+ CXLType3Class *cvc = CXL_TYPE3_CLASS(oc);
+
+ pc->realize = ct3_realize;
+ pc->exit = ct3_exit;
+ pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
+ pc->vendor_id = PCI_VENDOR_ID_INTEL;
+ pc->device_id = 0xd93; /* LVF for now */
+ pc->revision = 1;
+
+ pc->config_write = ct3d_config_write;
+ pc->config_read = ct3d_config_read;
+
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+ dc->desc = "CXL PMEM Device (Type 3)";
+ dc->reset = ct3d_reset;
+ device_class_set_props(dc, ct3_props);
+
+ cvc->get_lsa_size = get_lsa_size;
+ cvc->get_lsa = get_lsa;
+ cvc->set_lsa = set_lsa;
+}
+
+static const TypeInfo ct3d_info = {
+ .name = TYPE_CXL_TYPE3,
+ .parent = TYPE_PCI_DEVICE,
+ .class_size = sizeof(struct CXLType3Class),
+ .class_init = ct3_class_init,
+ .instance_size = sizeof(CXLType3Dev),
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CXL_DEVICE },
+ { INTERFACE_PCIE_DEVICE },
+ {}
+ },
+};
+
+static void ct3d_registers(void)
+{
+ type_register_static(&ct3d_info);
+}
+
+type_init(ct3d_registers);
diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c
new file mode 100644
index 00000000..d9f83017
--- /dev/null
+++ b/hw/mem/memory-device.c
@@ -0,0 +1,346 @@
+/*
+ * Memory Device Interface
+ *
+ * Copyright ProfitBricks GmbH 2012
+ * Copyright (C) 2014 Red Hat Inc
+ * Copyright (c) 2018 Red Hat Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/mem/memory-device.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "qemu/range.h"
+#include "hw/virtio/vhost.h"
+#include "sysemu/kvm.h"
+#include "trace.h"
+
+static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
+{
+ const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
+ const MemoryDeviceState *md_b = MEMORY_DEVICE(b);
+ const MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a);
+ const MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b);
+ const uint64_t addr_a = mdc_a->get_addr(md_a);
+ const uint64_t addr_b = mdc_b->get_addr(md_b);
+
+ if (addr_a > addr_b) {
+ return 1;
+ } else if (addr_a < addr_b) {
+ return -1;
+ }
+ return 0;
+}
+
+static int memory_device_build_list(Object *obj, void *opaque)
+{
+ GSList **list = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
+ DeviceState *dev = DEVICE(obj);
+ if (dev->realized) { /* only realized memory devices matter */
+ *list = g_slist_insert_sorted(*list, dev, memory_device_addr_sort);
+ }
+ }
+
+ object_child_foreach(obj, memory_device_build_list, opaque);
+ return 0;
+}
+
+static int memory_device_used_region_size(Object *obj, void *opaque)
+{
+ uint64_t *size = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
+ const DeviceState *dev = DEVICE(obj);
+ const MemoryDeviceState *md = MEMORY_DEVICE(obj);
+
+ if (dev->realized) {
+ *size += memory_device_get_region_size(md, &error_abort);
+ }
+ }
+
+ object_child_foreach(obj, memory_device_used_region_size, opaque);
+ return 0;
+}
+
+static void memory_device_check_addable(MachineState *ms, uint64_t size,
+ Error **errp)
+{
+ uint64_t used_region_size = 0;
+
+ /* we will need a new memory slot for kvm and vhost */
+ if (kvm_enabled() && !kvm_has_free_slot(ms)) {
+ error_setg(errp, "hypervisor has no free memory slots left");
+ return;
+ }
+ if (!vhost_has_free_slot()) {
+ error_setg(errp, "a used vhost backend has no free memory slots left");
+ return;
+ }
+
+ /* will we exceed the total amount of memory specified */
+ memory_device_used_region_size(OBJECT(ms), &used_region_size);
+ if (used_region_size + size < used_region_size ||
+ used_region_size + size > ms->maxram_size - ms->ram_size) {
+ error_setg(errp, "not enough space, currently 0x%" PRIx64
+ " in use of total space for memory devices 0x" RAM_ADDR_FMT,
+ used_region_size, ms->maxram_size - ms->ram_size);
+ return;
+ }
+
+}
+
+static uint64_t memory_device_get_free_addr(MachineState *ms,
+ const uint64_t *hint,
+ uint64_t align, uint64_t size,
+ Error **errp)
+{
+ Error *err = NULL;
+ GSList *list = NULL, *item;
+ Range as, new = range_empty;
+
+ if (!ms->device_memory) {
+ error_setg(errp, "memory devices (e.g. for memory hotplug) are not "
+ "supported by the machine");
+ return 0;
+ }
+
+ if (!memory_region_size(&ms->device_memory->mr)) {
+ error_setg(errp, "memory devices (e.g. for memory hotplug) are not "
+ "enabled, please specify the maxmem option");
+ return 0;
+ }
+ range_init_nofail(&as, ms->device_memory->base,
+ memory_region_size(&ms->device_memory->mr));
+
+ /* start of address space indicates the maximum alignment we expect */
+ if (!QEMU_IS_ALIGNED(range_lob(&as), align)) {
+ warn_report("the alignment (0x%" PRIx64 ") exceeds the expected"
+ " maximum alignment, memory will get fragmented and not"
+ " all 'maxmem' might be usable for memory devices.",
+ align);
+ }
+
+ memory_device_check_addable(ms, size, &err);
+ if (err) {
+ error_propagate(errp, err);
+ return 0;
+ }
+
+ if (hint && !QEMU_IS_ALIGNED(*hint, align)) {
+ error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes",
+ align);
+ return 0;
+ }
+
+ if (!QEMU_IS_ALIGNED(size, align)) {
+ error_setg(errp, "backend memory size must be multiple of 0x%"
+ PRIx64, align);
+ return 0;
+ }
+
+ if (hint) {
+ if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
+ error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
+ "], usable range for memory devices [0x%" PRIx64 ":0x%"
+ PRIx64 "]", *hint, size, range_lob(&as),
+ range_size(&as));
+ return 0;
+ }
+ } else {
+ if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) {
+ error_setg(errp, "can't add memory device, device too big");
+ return 0;
+ }
+ }
+
+ /* find address range that will fit new memory device */
+ object_child_foreach(OBJECT(ms), memory_device_build_list, &list);
+ for (item = list; item; item = g_slist_next(item)) {
+ const MemoryDeviceState *md = item->data;
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(OBJECT(md));
+ uint64_t next_addr;
+ Range tmp;
+
+ range_init_nofail(&tmp, mdc->get_addr(md),
+ memory_device_get_region_size(md, &error_abort));
+
+ if (range_overlaps_range(&tmp, &new)) {
+ if (hint) {
+ const DeviceState *d = DEVICE(md);
+ error_setg(errp, "address range conflicts with memory device"
+ " id='%s'", d->id ? d->id : "(unnamed)");
+ goto out;
+ }
+
+ next_addr = QEMU_ALIGN_UP(range_upb(&tmp) + 1, align);
+ if (!next_addr || range_init(&new, next_addr, range_size(&new))) {
+ range_make_empty(&new);
+ break;
+ }
+ } else if (range_lob(&tmp) > range_upb(&new)) {
+ break;
+ }
+ }
+
+ if (!range_contains_range(&as, &new)) {
+ error_setg(errp, "could not find position in guest address space for "
+ "memory device - memory fragmented due to alignments");
+ }
+out:
+ g_slist_free(list);
+ return range_lob(&new);
+}
+
+MemoryDeviceInfoList *qmp_memory_device_list(void)
+{
+ GSList *devices = NULL, *item;
+ MemoryDeviceInfoList *list = NULL, **tail = &list;
+
+ object_child_foreach(qdev_get_machine(), memory_device_build_list,
+ &devices);
+
+ for (item = devices; item; item = g_slist_next(item)) {
+ const MemoryDeviceState *md = MEMORY_DEVICE(item->data);
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
+ MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
+
+ mdc->fill_device_info(md, info);
+
+ QAPI_LIST_APPEND(tail, info);
+ }
+
+ g_slist_free(devices);
+
+ return list;
+}
+
+static int memory_device_plugged_size(Object *obj, void *opaque)
+{
+ uint64_t *size = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
+ const DeviceState *dev = DEVICE(obj);
+ const MemoryDeviceState *md = MEMORY_DEVICE(obj);
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
+
+ if (dev->realized) {
+ *size += mdc->get_plugged_size(md, &error_abort);
+ }
+ }
+
+ object_child_foreach(obj, memory_device_plugged_size, opaque);
+ return 0;
+}
+
+uint64_t get_plugged_memory_size(void)
+{
+ uint64_t size = 0;
+
+ memory_device_plugged_size(qdev_get_machine(), &size);
+
+ return size;
+}
+
+void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
+ const uint64_t *legacy_align, Error **errp)
+{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ Error *local_err = NULL;
+ uint64_t addr, align = 0;
+ MemoryRegion *mr;
+
+ mr = mdc->get_memory_region(md, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ if (legacy_align) {
+ align = *legacy_align;
+ } else {
+ if (mdc->get_min_alignment) {
+ align = mdc->get_min_alignment(md);
+ }
+ align = MAX(align, memory_region_get_alignment(mr));
+ }
+ addr = mdc->get_addr(md);
+ addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align,
+ memory_region_size(mr), &local_err);
+ if (local_err) {
+ goto out;
+ }
+ mdc->set_addr(md, addr, &local_err);
+ if (!local_err) {
+ trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "",
+ addr);
+ }
+out:
+ error_propagate(errp, local_err);
+}
+
+void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
+{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ const uint64_t addr = mdc->get_addr(md);
+ MemoryRegion *mr;
+
+ /*
+ * We expect that a previous call to memory_device_pre_plug() succeeded, so
+ * it can't fail at this point.
+ */
+ mr = mdc->get_memory_region(md, &error_abort);
+ g_assert(ms->device_memory);
+
+ memory_region_add_subregion(&ms->device_memory->mr,
+ addr - ms->device_memory->base, mr);
+ trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
+}
+
+void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
+{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ MemoryRegion *mr;
+
+ /*
+ * We expect that a previous call to memory_device_pre_plug() succeeded, so
+ * it can't fail at this point.
+ */
+ mr = mdc->get_memory_region(md, &error_abort);
+ g_assert(ms->device_memory);
+
+ memory_region_del_subregion(&ms->device_memory->mr, mr);
+ trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
+ mdc->get_addr(md));
+}
+
+uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
+ Error **errp)
+{
+ const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
+ MemoryRegion *mr;
+
+ /* dropping const here is fine as we don't touch the memory region */
+ mr = mdc->get_memory_region((MemoryDeviceState *)md, errp);
+ if (!mr) {
+ return 0;
+ }
+
+ return memory_region_size(mr);
+}
+
+static const TypeInfo memory_device_info = {
+ .name = TYPE_MEMORY_DEVICE,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(MemoryDeviceClass),
+};
+
+static void memory_device_register_types(void)
+{
+ type_register_static(&memory_device_info);
+}
+
+type_init(memory_device_register_types)
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
new file mode 100644
index 00000000..609b2b36
--- /dev/null
+++ b/hw/mem/meson.build
@@ -0,0 +1,10 @@
+mem_ss = ss.source_set()
+mem_ss.add(files('memory-device.c'))
+mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
+mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
+mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
+mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c'))
+
+softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss)
+
+softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c'))
diff --git a/hw/mem/npcm7xx_mc.c b/hw/mem/npcm7xx_mc.c
new file mode 100644
index 00000000..abc5af56
--- /dev/null
+++ b/hw/mem/npcm7xx_mc.c
@@ -0,0 +1,84 @@
+/*
+ * Nuvoton NPCM7xx Memory Controller stub
+ *
+ * Copyright 2020 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/mem/npcm7xx_mc.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+
+#define NPCM7XX_MC_REGS_SIZE (4 * KiB)
+
+static uint64_t npcm7xx_mc_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ /*
+ * If bits 8..11 @ offset 0 are not zero, the boot block thinks the memory
+ * controller has already been initialized and will skip DDR training.
+ */
+ if (addr == 0) {
+ return 0x100;
+ }
+
+ qemu_log_mask(LOG_UNIMP, "%s: mostly unimplemented\n", __func__);
+
+ return 0;
+}
+
+static void npcm7xx_mc_write(void *opaque, hwaddr addr, uint64_t v,
+ unsigned int size)
+{
+ qemu_log_mask(LOG_UNIMP, "%s: mostly unimplemented\n", __func__);
+}
+
+static const MemoryRegionOps npcm7xx_mc_ops = {
+ .read = npcm7xx_mc_read,
+ .write = npcm7xx_mc_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ .unaligned = false,
+ },
+};
+
+static void npcm7xx_mc_realize(DeviceState *dev, Error **errp)
+{
+ NPCM7xxMCState *s = NPCM7XX_MC(dev);
+
+ memory_region_init_io(&s->mmio, OBJECT(s), &npcm7xx_mc_ops, s, "regs",
+ NPCM7XX_MC_REGS_SIZE);
+ sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
+}
+
+static void npcm7xx_mc_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "NPCM7xx Memory Controller stub";
+ dc->realize = npcm7xx_mc_realize;
+}
+
+static const TypeInfo npcm7xx_mc_types[] = {
+ {
+ .name = TYPE_NPCM7XX_MC,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(NPCM7xxMCState),
+ .class_init = npcm7xx_mc_class_init,
+ },
+};
+DEFINE_TYPES(npcm7xx_mc_types);
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
new file mode 100644
index 00000000..31080c22
--- /dev/null
+++ b/hw/mem/nvdimm.c
@@ -0,0 +1,282 @@
+/*
+ * Non-Volatile Dual In-line Memory Module Virtualization Implementation
+ *
+ * Copyright(C) 2015 Intel Corporation.
+ *
+ * Author:
+ * Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *
+ * Currently, it only supports PMEM Virtualization.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "qemu/pmem.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "hw/mem/nvdimm.h"
+#include "hw/qdev-properties.h"
+#include "hw/mem/memory-device.h"
+#include "sysemu/hostmem.h"
+
+static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+ uint64_t value = nvdimm->label_size;
+
+ visit_type_size(v, name, &value, errp);
+}
+
+static void nvdimm_set_label_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+ uint64_t value;
+
+ if (nvdimm->nvdimm_mr) {
+ error_setg(errp, "cannot change property value");
+ return;
+ }
+
+ if (!visit_type_size(v, name, &value, errp)) {
+ return;
+ }
+ if (value < MIN_NAMESPACE_LABEL_SIZE) {
+ error_setg(errp, "Property '%s.%s' (0x%" PRIx64 ") is required"
+ " at least 0x%lx", object_get_typename(obj), name, value,
+ MIN_NAMESPACE_LABEL_SIZE);
+ return;
+ }
+
+ nvdimm->label_size = value;
+}
+
+static void nvdimm_get_uuid(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+ char *value = NULL;
+
+ value = qemu_uuid_unparse_strdup(&nvdimm->uuid);
+
+ visit_type_str(v, name, &value, errp);
+ g_free(value);
+}
+
+
+static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+ char *value;
+
+ if (!visit_type_str(v, name, &value, errp)) {
+ return;
+ }
+
+ if (qemu_uuid_parse(value, &nvdimm->uuid) != 0) {
+ error_setg(errp, "Property '%s.%s' has invalid value",
+ object_get_typename(obj), name);
+ }
+
+ g_free(value);
+}
+
+
+static void nvdimm_init(Object *obj)
+{
+ object_property_add(obj, NVDIMM_LABEL_SIZE_PROP, "int",
+ nvdimm_get_label_size, nvdimm_set_label_size, NULL,
+ NULL);
+
+ object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid,
+ nvdimm_set_uuid, NULL, NULL);
+}
+
+static void nvdimm_finalize(Object *obj)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+ g_free(nvdimm->nvdimm_mr);
+}
+
+static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
+{
+ PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+ uint64_t align, pmem_size, size;
+ MemoryRegion *mr;
+
+ g_assert(!nvdimm->nvdimm_mr);
+
+ if (!dimm->hostmem) {
+ error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set");
+ return;
+ }
+
+ mr = host_memory_backend_get_memory(dimm->hostmem);
+ align = memory_region_get_alignment(mr);
+ size = memory_region_size(mr);
+
+ pmem_size = size - nvdimm->label_size;
+ nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size;
+ pmem_size = QEMU_ALIGN_DOWN(pmem_size, align);
+
+ if (size <= nvdimm->label_size || !pmem_size) {
+ HostMemoryBackend *hostmem = dimm->hostmem;
+
+ error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too "
+ "small to contain nvdimm label (0x%" PRIx64 ") and "
+ "aligned PMEM (0x%" PRIx64 ")",
+ object_get_canonical_path_component(OBJECT(hostmem)),
+ memory_region_size(mr), nvdimm->label_size, align);
+ return;
+ }
+
+ if (!nvdimm->unarmed && memory_region_is_rom(mr)) {
+ HostMemoryBackend *hostmem = dimm->hostmem;
+
+ error_setg(errp, "'unarmed' property must be 'on' since memdev %s "
+ "is read-only",
+ object_get_canonical_path_component(OBJECT(hostmem)));
+ return;
+ }
+
+ nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
+ memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
+ "nvdimm-memory", mr, 0, pmem_size);
+ memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true);
+ nvdimm->nvdimm_mr->align = align;
+}
+
+static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md,
+ Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(md);
+ Error *local_err = NULL;
+
+ if (!nvdimm->nvdimm_mr) {
+ nvdimm_prepare_memory_region(nvdimm, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+ }
+ return nvdimm->nvdimm_mr;
+}
+
+static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(dimm);
+ NVDIMMClass *ndc = NVDIMM_GET_CLASS(nvdimm);
+
+ if (!nvdimm->nvdimm_mr) {
+ nvdimm_prepare_memory_region(nvdimm, errp);
+ }
+
+ if (ndc->realize) {
+ ndc->realize(nvdimm, errp);
+ }
+}
+
+static void nvdimm_unrealize(PCDIMMDevice *dimm)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(dimm);
+ NVDIMMClass *ndc = NVDIMM_GET_CLASS(nvdimm);
+
+ if (ndc->unrealize) {
+ ndc->unrealize(nvdimm);
+ }
+}
+
+/*
+ * the caller should check the input parameters before calling
+ * label read/write functions.
+ */
+static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
+ uint64_t offset)
+{
+ assert((nvdimm->label_size >= size + offset) && (offset + size > offset));
+}
+
+static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
+ uint64_t size, uint64_t offset)
+{
+ nvdimm_validate_rw_label_data(nvdimm, size, offset);
+
+ memcpy(buf, nvdimm->label_data + offset, size);
+}
+
+static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
+ uint64_t size, uint64_t offset)
+{
+ MemoryRegion *mr;
+ PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+ bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem),
+ "pmem", NULL);
+ uint64_t backend_offset;
+
+ nvdimm_validate_rw_label_data(nvdimm, size, offset);
+
+ if (!is_pmem) {
+ memcpy(nvdimm->label_data + offset, buf, size);
+ } else {
+ pmem_memcpy_persist(nvdimm->label_data + offset, buf, size);
+ }
+
+ mr = host_memory_backend_get_memory(dimm->hostmem);
+ backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
+ memory_region_set_dirty(mr, backend_offset, size);
+}
+
+static Property nvdimm_properties[] = {
+ DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void nvdimm_class_init(ObjectClass *oc, void *data)
+{
+ PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
+ MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
+ NVDIMMClass *nvc = NVDIMM_CLASS(oc);
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ ddc->realize = nvdimm_realize;
+ ddc->unrealize = nvdimm_unrealize;
+ mdc->get_memory_region = nvdimm_md_get_memory_region;
+ device_class_set_props(dc, nvdimm_properties);
+
+ nvc->read_label_data = nvdimm_read_label_data;
+ nvc->write_label_data = nvdimm_write_label_data;
+ set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
+}
+
+static const TypeInfo nvdimm_info = {
+ .name = TYPE_NVDIMM,
+ .parent = TYPE_PC_DIMM,
+ .class_size = sizeof(NVDIMMClass),
+ .class_init = nvdimm_class_init,
+ .instance_size = sizeof(NVDIMMDevice),
+ .instance_init = nvdimm_init,
+ .instance_finalize = nvdimm_finalize,
+};
+
+static void nvdimm_register_types(void)
+{
+ type_register_static(&nvdimm_info);
+}
+
+type_init(nvdimm_register_types)
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
new file mode 100644
index 00000000..f27e1a11
--- /dev/null
+++ b/hw/mem/pc-dimm.c
@@ -0,0 +1,312 @@
+/*
+ * Dimm device for Memory Hotplug
+ *
+ * Copyright ProfitBricks GmbH 2012
+ * Copyright (C) 2014 Red Hat Inc
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "hw/boards.h"
+#include "hw/mem/pc-dimm.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "hw/mem/nvdimm.h"
+#include "hw/mem/memory-device.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/module.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "trace.h"
+
+static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
+
+static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp)
+{
+ if (!dimm->hostmem) {
+ error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set");
+ return NULL;
+ }
+
+ return host_memory_backend_get_memory(dimm->hostmem);
+}
+
+void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine,
+ const uint64_t *legacy_align, Error **errp)
+{
+ Error *local_err = NULL;
+ int slot;
+
+ slot = object_property_get_int(OBJECT(dimm), PC_DIMM_SLOT_PROP,
+ &error_abort);
+ if ((slot < 0 || slot >= machine->ram_slots) &&
+ slot != PC_DIMM_UNASSIGNED_SLOT) {
+ error_setg(errp,
+ "invalid slot number %d, valid range is [0-%" PRIu64 "]",
+ slot, machine->ram_slots - 1);
+ return;
+ }
+
+ slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
+ machine->ram_slots, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ object_property_set_int(OBJECT(dimm), PC_DIMM_SLOT_PROP, slot,
+ &error_abort);
+ trace_mhp_pc_dimm_assigned_slot(slot);
+
+ memory_device_pre_plug(MEMORY_DEVICE(dimm), machine, legacy_align,
+ errp);
+}
+
+void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine)
+{
+ MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm,
+ &error_abort);
+
+ memory_device_plug(MEMORY_DEVICE(dimm), machine);
+ vmstate_register_ram(vmstate_mr, DEVICE(dimm));
+}
+
+void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine)
+{
+ MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm,
+ &error_abort);
+
+ memory_device_unplug(MEMORY_DEVICE(dimm), machine);
+ vmstate_unregister_ram(vmstate_mr, DEVICE(dimm));
+}
+
+static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
+{
+ unsigned long *bitmap = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
+ DeviceState *dev = DEVICE(obj);
+ if (dev->realized) { /* count only realized DIMMs */
+ PCDIMMDevice *d = PC_DIMM(obj);
+ set_bit(d->slot, bitmap);
+ }
+ }
+
+ object_child_foreach(obj, pc_dimm_slot2bitmap, opaque);
+ return 0;
+}
+
+static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp)
+{
+ unsigned long *bitmap;
+ int slot = 0;
+
+ if (max_slots <= 0) {
+ error_setg(errp, "no slots where allocated, please specify "
+ "the 'slots' option");
+ return slot;
+ }
+
+ bitmap = bitmap_new(max_slots);
+ object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap);
+
+ /* check if requested slot is not occupied */
+ if (hint) {
+ if (*hint >= max_slots) {
+ error_setg(errp, "invalid slot# %d, should be less than %d",
+ *hint, max_slots);
+ } else if (!test_bit(*hint, bitmap)) {
+ slot = *hint;
+ } else {
+ error_setg(errp, "slot %d is busy", *hint);
+ }
+ goto out;
+ }
+
+ /* search for free slot */
+ slot = find_first_zero_bit(bitmap, max_slots);
+ if (slot == max_slots) {
+ error_setg(errp, "no free slots available");
+ }
+out:
+ g_free(bitmap);
+ return slot;
+}
+
+static Property pc_dimm_properties[] = {
+ DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0),
+ DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0),
+ DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot,
+ PC_DIMM_UNASSIGNED_SLOT),
+ DEFINE_PROP_LINK(PC_DIMM_MEMDEV_PROP, PCDIMMDevice, hostmem,
+ TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ Error *local_err = NULL;
+ uint64_t value;
+
+ value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ visit_type_uint64(v, name, &value, errp);
+}
+
+static void pc_dimm_init(Object *obj)
+{
+ object_property_add(obj, PC_DIMM_SIZE_PROP, "uint64", pc_dimm_get_size,
+ NULL, NULL, NULL);
+}
+
+static void pc_dimm_realize(DeviceState *dev, Error **errp)
+{
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+ MachineState *ms = MACHINE(qdev_get_machine());
+
+ if (ms->numa_state) {
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+
+ if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
+ (!nb_numa_nodes && dimm->node)) {
+ error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
+ PRIu32 "' which exceeds the number of numa nodes: %d",
+ dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
+ return;
+ }
+ } else if (dimm->node > 0) {
+ error_setg(errp, "machine doesn't support NUMA");
+ return;
+ }
+
+ if (!dimm->hostmem) {
+ error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
+ return;
+ } else if (host_memory_backend_is_mapped(dimm->hostmem)) {
+ error_setg(errp, "can't use already busy memdev: %s",
+ object_get_canonical_path_component(OBJECT(dimm->hostmem)));
+ return;
+ }
+
+ if (ddc->realize) {
+ ddc->realize(dimm, errp);
+ }
+
+ host_memory_backend_set_mapped(dimm->hostmem, true);
+}
+
+static void pc_dimm_unrealize(DeviceState *dev)
+{
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+
+ if (ddc->unrealize) {
+ ddc->unrealize(dimm);
+ }
+
+ host_memory_backend_set_mapped(dimm->hostmem, false);
+}
+
+static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md)
+{
+ return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP,
+ &error_abort);
+}
+
+static void pc_dimm_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+ Error **errp)
+{
+ object_property_set_uint(OBJECT(md), PC_DIMM_ADDR_PROP, addr, errp);
+}
+
+static MemoryRegion *pc_dimm_md_get_memory_region(MemoryDeviceState *md,
+ Error **errp)
+{
+ return pc_dimm_get_memory_region(PC_DIMM(md), errp);
+}
+
+static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md,
+ MemoryDeviceInfo *info)
+{
+ PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
+ const DeviceClass *dc = DEVICE_GET_CLASS(md);
+ const PCDIMMDevice *dimm = PC_DIMM(md);
+ const DeviceState *dev = DEVICE(md);
+
+ if (dev->id) {
+ di->has_id = true;
+ di->id = g_strdup(dev->id);
+ }
+ di->hotplugged = dev->hotplugged;
+ di->hotpluggable = dc->hotpluggable;
+ di->addr = dimm->addr;
+ di->slot = dimm->slot;
+ di->node = dimm->node;
+ di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP,
+ NULL);
+ di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+ info->u.nvdimm.data = di;
+ info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM;
+ } else {
+ info->u.dimm.data = di;
+ info->type = MEMORY_DEVICE_INFO_KIND_DIMM;
+ }
+}
+
+static void pc_dimm_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc);
+
+ dc->realize = pc_dimm_realize;
+ dc->unrealize = pc_dimm_unrealize;
+ device_class_set_props(dc, pc_dimm_properties);
+ dc->desc = "DIMM memory module";
+
+ mdc->get_addr = pc_dimm_md_get_addr;
+ mdc->set_addr = pc_dimm_md_set_addr;
+ /* for a dimm plugged_size == region_size */
+ mdc->get_plugged_size = memory_device_get_region_size;
+ mdc->get_memory_region = pc_dimm_md_get_memory_region;
+ mdc->fill_device_info = pc_dimm_md_fill_device_info;
+}
+
+static const TypeInfo pc_dimm_info = {
+ .name = TYPE_PC_DIMM,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PCDIMMDevice),
+ .instance_init = pc_dimm_init,
+ .class_init = pc_dimm_class_init,
+ .class_size = sizeof(PCDIMMDeviceClass),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_MEMORY_DEVICE },
+ { }
+ },
+};
+
+static void pc_dimm_register_types(void)
+{
+ type_register_static(&pc_dimm_info);
+}
+
+type_init(pc_dimm_register_types)
diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c
new file mode 100644
index 00000000..e6640eb8
--- /dev/null
+++ b/hw/mem/sparse-mem.c
@@ -0,0 +1,150 @@
+/*
+ * A sparse memory device. Useful for fuzzing
+ *
+ * Copyright Red Hat Inc., 2021
+ *
+ * Authors:
+ * Alexander Bulekov <alxndr@bu.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "qapi/error.h"
+#include "qemu/units.h"
+#include "sysemu/qtest.h"
+#include "hw/mem/sparse-mem.h"
+
+#define SPARSE_MEM(obj) OBJECT_CHECK(SparseMemState, (obj), TYPE_SPARSE_MEM)
+#define SPARSE_BLOCK_SIZE 0x1000
+
+typedef struct SparseMemState {
+ SysBusDevice parent_obj;
+ MemoryRegion mmio;
+ uint64_t baseaddr;
+ uint64_t length;
+ uint64_t size_used;
+ uint64_t maxsize;
+ GHashTable *mapped;
+} SparseMemState;
+
+typedef struct sparse_mem_block {
+ uint8_t data[SPARSE_BLOCK_SIZE];
+} sparse_mem_block;
+
+static uint64_t sparse_mem_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ SparseMemState *s = opaque;
+ uint64_t ret = 0;
+ size_t pfn = addr / SPARSE_BLOCK_SIZE;
+ size_t offset = addr % SPARSE_BLOCK_SIZE;
+ sparse_mem_block *block;
+
+ block = g_hash_table_lookup(s->mapped, (void *)pfn);
+ if (block) {
+ assert(offset + size <= sizeof(block->data));
+ memcpy(&ret, block->data + offset, size);
+ }
+ return ret;
+}
+
+static void sparse_mem_write(void *opaque, hwaddr addr, uint64_t v,
+ unsigned int size)
+{
+ SparseMemState *s = opaque;
+ size_t pfn = addr / SPARSE_BLOCK_SIZE;
+ size_t offset = addr % SPARSE_BLOCK_SIZE;
+ sparse_mem_block *block;
+
+ if (!g_hash_table_lookup(s->mapped, (void *)pfn) &&
+ s->size_used + SPARSE_BLOCK_SIZE < s->maxsize && v) {
+ g_hash_table_insert(s->mapped, (void *)pfn,
+ g_new0(sparse_mem_block, 1));
+ s->size_used += sizeof(block->data);
+ }
+ block = g_hash_table_lookup(s->mapped, (void *)pfn);
+ if (!block) {
+ return;
+ }
+
+ assert(offset + size <= sizeof(block->data));
+
+ memcpy(block->data + offset, &v, size);
+
+}
+
+static const MemoryRegionOps sparse_mem_ops = {
+ .read = sparse_mem_read,
+ .write = sparse_mem_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+};
+
+static Property sparse_mem_properties[] = {
+ /* The base address of the memory */
+ DEFINE_PROP_UINT64("baseaddr", SparseMemState, baseaddr, 0x0),
+ /* The length of the sparse memory region */
+ DEFINE_PROP_UINT64("length", SparseMemState, length, UINT64_MAX),
+ /* Max amount of actual memory that can be used to back the sparse memory */
+ DEFINE_PROP_UINT64("maxsize", SparseMemState, maxsize, 10 * MiB),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+MemoryRegion *sparse_mem_init(uint64_t addr, uint64_t length)
+{
+ DeviceState *dev;
+
+ dev = qdev_new(TYPE_SPARSE_MEM);
+ qdev_prop_set_uint64(dev, "baseaddr", addr);
+ qdev_prop_set_uint64(dev, "length", length);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, addr, -10000);
+ return &SPARSE_MEM(dev)->mmio;
+}
+
+static void sparse_mem_realize(DeviceState *dev, Error **errp)
+{
+ SparseMemState *s = SPARSE_MEM(dev);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+ if (!qtest_enabled()) {
+ error_setg(errp, "sparse_mem device should only be used "
+ "for testing with QTest");
+ return;
+ }
+
+ assert(s->baseaddr + s->length > s->baseaddr);
+
+ s->mapped = g_hash_table_new(NULL, NULL);
+ memory_region_init_io(&s->mmio, OBJECT(s), &sparse_mem_ops, s,
+ "sparse-mem", s->length);
+ sysbus_init_mmio(sbd, &s->mmio);
+}
+
+static void sparse_mem_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, sparse_mem_properties);
+
+ dc->desc = "Sparse Memory Device";
+ dc->realize = sparse_mem_realize;
+}
+
+static const TypeInfo sparse_mem_types[] = {
+ {
+ .name = TYPE_SPARSE_MEM,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(SparseMemState),
+ .class_init = sparse_mem_class_init,
+ },
+};
+DEFINE_TYPES(sparse_mem_types);
diff --git a/hw/mem/trace-events b/hw/mem/trace-events
new file mode 100644
index 00000000..8b6b02b5
--- /dev/null
+++ b/hw/mem/trace-events
@@ -0,0 +1,8 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# pc-dimm.c
+mhp_pc_dimm_assigned_slot(int slot) "%d"
+# memory-device.c
+memory_device_pre_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64
+memory_device_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64
+memory_device_unplug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64
diff --git a/hw/mem/trace.h b/hw/mem/trace.h
new file mode 100644
index 00000000..2f2c9454
--- /dev/null
+++ b/hw/mem/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_mem.h"