diff options
Diffstat (limited to 'hw/mem')
-rw-r--r-- | hw/mem/Kconfig | 18 | ||||
-rw-r--r-- | hw/mem/cxl_type3.c | 647 | ||||
-rw-r--r-- | hw/mem/memory-device.c | 346 | ||||
-rw-r--r-- | hw/mem/meson.build | 10 | ||||
-rw-r--r-- | hw/mem/npcm7xx_mc.c | 84 | ||||
-rw-r--r-- | hw/mem/nvdimm.c | 282 | ||||
-rw-r--r-- | hw/mem/pc-dimm.c | 312 | ||||
-rw-r--r-- | hw/mem/sparse-mem.c | 150 | ||||
-rw-r--r-- | hw/mem/trace-events | 8 | ||||
-rw-r--r-- | hw/mem/trace.h | 1 |
10 files changed, 1858 insertions, 0 deletions
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig new file mode 100644 index 00000000..73c5ae8a --- /dev/null +++ b/hw/mem/Kconfig @@ -0,0 +1,18 @@ +config DIMM + bool + select MEM_DEVICE + +config MEM_DEVICE + bool + +config NVDIMM + bool + select MEM_DEVICE + +config SPARSE_MEM + bool + +config CXL_MEM_DEVICE + bool + default y if CXL + select MEM_DEVICE diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c new file mode 100644 index 00000000..25559020 --- /dev/null +++ b/hw/mem/cxl_type3.c @@ -0,0 +1,647 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "hw/mem/memory-device.h" +#include "hw/mem/pc-dimm.h" +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/pmem.h" +#include "qemu/range.h" +#include "qemu/rcu.h" +#include "sysemu/hostmem.h" +#include "sysemu/numa.h" +#include "hw/cxl/cxl.h" +#include "hw/pci/msix.h" + +#define DWORD_BYTE 4 + +/* Default CDAT entries for a memory region */ +enum { + CT3_CDAT_DSMAS, + CT3_CDAT_DSLBIS0, + CT3_CDAT_DSLBIS1, + CT3_CDAT_DSLBIS2, + CT3_CDAT_DSLBIS3, + CT3_CDAT_DSEMTS, + CT3_CDAT_NUM_ENTRIES +}; + +static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, + int dsmad_handle, MemoryRegion *mr) +{ + g_autofree CDATDsmas *dsmas = NULL; + g_autofree CDATDslbis *dslbis0 = NULL; + g_autofree CDATDslbis *dslbis1 = NULL; + g_autofree CDATDslbis *dslbis2 = NULL; + g_autofree CDATDslbis *dslbis3 = NULL; + g_autofree CDATDsemts *dsemts = NULL; + + dsmas = g_malloc(sizeof(*dsmas)); + if (!dsmas) { + return -ENOMEM; + } + *dsmas = (CDATDsmas) { + .header = { + .type = CDAT_TYPE_DSMAS, + .length = sizeof(*dsmas), + }, + .DSMADhandle = dsmad_handle, + .flags = CDAT_DSMAS_FLAG_NV, + .DPA_base = 0, + .DPA_length = int128_get64(mr->size), + }; + + /* For now, no memory side cache, plausiblish numbers */ + dslbis0 = g_malloc(sizeof(*dslbis0)); + if (!dslbis0) { + return -ENOMEM; + } + *dslbis0 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis0), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_READ_LATENCY, + .entry_base_unit = 10000, /* 10ns base */ + .entry[0] = 15, /* 150ns */ + }; + + dslbis1 = g_malloc(sizeof(*dslbis1)); + if (!dslbis1) { + return -ENOMEM; + } + *dslbis1 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis1), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_WRITE_LATENCY, + .entry_base_unit = 10000, + .entry[0] = 25, /* 250ns */ + }; + + dslbis2 = g_malloc(sizeof(*dslbis2)); + if (!dslbis2) { + return -ENOMEM; + } + *dslbis2 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis2), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_READ_BANDWIDTH, + .entry_base_unit = 1000, /* GB/s */ + .entry[0] = 16, + }; + + dslbis3 = g_malloc(sizeof(*dslbis3)); + if (!dslbis3) { + return -ENOMEM; + } + *dslbis3 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis3), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH, + .entry_base_unit = 1000, /* GB/s */ + .entry[0] = 16, + }; + + dsemts = g_malloc(sizeof(*dsemts)); + if (!dsemts) { + return -ENOMEM; + } + *dsemts = (CDATDsemts) { + .header = { + .type = CDAT_TYPE_DSEMTS, + .length = sizeof(*dsemts), + }, + .DSMAS_handle = dsmad_handle, + /* Reserved - the non volatile from DSMAS matters */ + .EFI_memory_type_attr = 2, + .DPA_offset = 0, + .DPA_length = int128_get64(mr->size), + }; + + /* Header always at start of structure */ + cdat_table[CT3_CDAT_DSMAS] = g_steal_pointer(&dsmas); + cdat_table[CT3_CDAT_DSLBIS0] = g_steal_pointer(&dslbis0); + cdat_table[CT3_CDAT_DSLBIS1] = g_steal_pointer(&dslbis1); + cdat_table[CT3_CDAT_DSLBIS2] = g_steal_pointer(&dslbis2); + cdat_table[CT3_CDAT_DSLBIS3] = g_steal_pointer(&dslbis3); + cdat_table[CT3_CDAT_DSEMTS] = g_steal_pointer(&dsemts); + + return 0; +} + +static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) +{ + g_autofree CDATSubHeader **table = NULL; + MemoryRegion *nonvolatile_mr; + CXLType3Dev *ct3d = priv; + int dsmad_handle = 0; + int rc; + + if (!ct3d->hostmem) { + return 0; + } + + nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!nonvolatile_mr) { + return -EINVAL; + } + + table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table)); + if (!table) { + return -ENOMEM; + } + + rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr); + if (rc < 0) { + return rc; + } + + *cdat_table = g_steal_pointer(&table); + + return CT3_CDAT_NUM_ENTRIES; +} + +static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv) +{ + int i; + + for (i = 0; i < num; i++) { + g_free(cdat_table[i]); + } + g_free(cdat_table); +} + +static bool cxl_doe_cdat_rsp(DOECap *doe_cap) +{ + CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat; + uint16_t ent; + void *base; + uint32_t len; + CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap); + CDATRsp rsp; + + assert(cdat->entry_len); + + /* Discard if request length mismatched */ + if (pcie_doe_get_obj_len(req) < + DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) { + return false; + } + + ent = req->entry_handle; + base = cdat->entry[ent].base; + len = cdat->entry[ent].length; + + rsp = (CDATRsp) { + .header = { + .vendor_id = CXL_VENDOR_ID, + .data_obj_type = CXL_DOE_TABLE_ACCESS, + .reserved = 0x0, + .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE), + }, + .rsp_code = CXL_DOE_TAB_RSP, + .table_type = CXL_DOE_TAB_TYPE_CDAT, + .entry_handle = (ent < cdat->entry_len - 1) ? + ent + 1 : CXL_DOE_TAB_ENT_MAX, + }; + + memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp)); + memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE), + base, len); + + doe_cap->read_mbox_len += rsp.header.length; + + return true; +} + +static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + uint32_t val; + + if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) { + return val; + } + + return pci_default_read_config(pci_dev, addr, size); +} + +static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val, + int size) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + + pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size); + pci_default_write_config(pci_dev, addr, val, size); +} + +/* + * Null value of all Fs suggested by IEEE RA guidelines for use of + * EU, OUI and CID + */ +#define UI64_NULL ~(0ULL) + +static void build_dvsecs(CXLType3Dev *ct3d) +{ + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + uint8_t *dvsec; + + dvsec = (uint8_t *)&(CXLDVSECDevice){ + .cap = 0x1e, + .ctrl = 0x2, + .status2 = 0x2, + .range1_size_hi = ct3d->hostmem->size >> 32, + .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (ct3d->hostmem->size & 0xF0000000), + .range1_base_hi = 0, + .range1_base_lo = 0, + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + PCIE_CXL_DEVICE_DVSEC_LENGTH, + PCIE_CXL_DEVICE_DVSEC, + PCIE_CXL2_DEVICE_DVSEC_REVID, dvsec); + + dvsec = (uint8_t *)&(CXLDVSECRegisterLocator){ + .rsvd = 0, + .reg0_base_lo = RBI_COMPONENT_REG | CXL_COMPONENT_REG_BAR_IDX, + .reg0_base_hi = 0, + .reg1_base_lo = RBI_CXL_DEVICE_REG | CXL_DEVICE_REG_BAR_IDX, + .reg1_base_hi = 0, + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + REG_LOC_DVSEC_LENGTH, REG_LOC_DVSEC, + REG_LOC_DVSEC_REVID, dvsec); + dvsec = (uint8_t *)&(CXLDVSECDeviceGPF){ + .phase2_duration = 0x603, /* 3 seconds */ + .phase2_power = 0x33, /* 0x33 miliwatts */ + }; + cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, + GPF_DEVICE_DVSEC_LENGTH, GPF_DEVICE_DVSEC, + GPF_DEVICE_DVSEC_REVID, dvsec); +} + +static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) +{ + ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; + uint32_t *cache_mem = cregs->cache_mem_registers; + + assert(which == 0); + + /* TODO: Sanity checks that the decoder is possible */ + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); + + ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); +} + +static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) +{ + CXLComponentState *cxl_cstate = opaque; + ComponentRegisters *cregs = &cxl_cstate->crb; + CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate); + uint32_t *cache_mem = cregs->cache_mem_registers; + bool should_commit = false; + int which_hdm = -1; + + assert(size == 4); + g_assert(offset < CXL2_COMPONENT_CM_REGION_SIZE); + + switch (offset) { + case A_CXL_HDM_DECODER0_CTRL: + should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + which_hdm = 0; + break; + default: + break; + } + + stl_le_p((uint8_t *)cache_mem + offset, value); + if (should_commit) { + hdm_decoder_commit(ct3d, which_hdm); + } +} + +static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) +{ + DeviceState *ds = DEVICE(ct3d); + MemoryRegion *mr; + char *name; + + if (!ct3d->hostmem) { + error_setg(errp, "memdev property must be set"); + return false; + } + + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + error_setg(errp, "memdev property must be set"); + return false; + } + memory_region_set_nonvolatile(mr, true); + memory_region_set_enabled(mr, true); + host_memory_backend_set_mapped(ct3d->hostmem, true); + + if (ds->id) { + name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id); + } else { + name = g_strdup("cxl-type3-dpa-space"); + } + address_space_init(&ct3d->hostmem_as, mr, name); + g_free(name); + + ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size; + + if (!ct3d->lsa) { + error_setg(errp, "lsa property must be set"); + return false; + } + + return true; +} + +static DOEProtocol doe_cdat_prot[] = { + { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp }, + { } +}; + +static void ct3_realize(PCIDevice *pci_dev, Error **errp) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + MemoryRegion *mr = ®s->component_registers; + uint8_t *pci_conf = pci_dev->config; + unsigned short msix_num = 1; + int i; + + if (!cxl_setup_memory(ct3d, errp)) { + return; + } + + pci_config_set_prog_interface(pci_conf, 0x10); + pci_config_set_class(pci_conf, PCI_CLASS_MEMORY_CXL); + + pcie_endpoint_cap_init(pci_dev, 0x80); + if (ct3d->sn != UI64_NULL) { + pcie_dev_ser_num_init(pci_dev, 0x100, ct3d->sn); + cxl_cstate->dvsec_offset = 0x100 + 0x0c; + } else { + cxl_cstate->dvsec_offset = 0x100; + } + + ct3d->cxl_cstate.pdev = pci_dev; + build_dvsecs(ct3d); + + regs->special_ops = g_new0(MemoryRegionOps, 1); + regs->special_ops->write = ct3d_reg_write; + + cxl_component_register_block_init(OBJECT(pci_dev), cxl_cstate, + TYPE_CXL_TYPE3); + + pci_register_bar( + pci_dev, CXL_COMPONENT_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, mr); + + cxl_device_register_block_init(OBJECT(pci_dev), &ct3d->cxl_dstate); + pci_register_bar(pci_dev, CXL_DEVICE_REG_BAR_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, + &ct3d->cxl_dstate.device_registers); + + /* MSI(-X) Initailization */ + msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL); + for (i = 0; i < msix_num; i++) { + msix_vector_use(pci_dev, i); + } + + /* DOE Initailization */ + pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0); + + cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table; + cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table; + cxl_cstate->cdat.private = ct3d; + cxl_doe_cdat_init(cxl_cstate, errp); +} + +static void ct3_exit(PCIDevice *pci_dev) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; + ComponentRegisters *regs = &cxl_cstate->crb; + + cxl_doe_cdat_release(cxl_cstate); + g_free(regs->special_ops); + address_space_destroy(&ct3d->hostmem_as); +} + +/* TODO: Support multiple HDM decoders and DPA skip */ +static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) +{ + uint32_t *cache_mem = ct3d->cxl_cstate.crb.cache_mem_registers; + uint64_t decoder_base, decoder_size, hpa_offset; + uint32_t hdm0_ctrl; + int ig, iw; + + decoder_base = (((uint64_t)cache_mem[R_CXL_HDM_DECODER0_BASE_HI] << 32) | + cache_mem[R_CXL_HDM_DECODER0_BASE_LO]); + if ((uint64_t)host_addr < decoder_base) { + return false; + } + + hpa_offset = (uint64_t)host_addr - decoder_base; + + decoder_size = ((uint64_t)cache_mem[R_CXL_HDM_DECODER0_SIZE_HI] << 32) | + cache_mem[R_CXL_HDM_DECODER0_SIZE_LO]; + if (hpa_offset >= decoder_size) { + return false; + } + + hdm0_ctrl = cache_mem[R_CXL_HDM_DECODER0_CTRL]; + iw = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IW); + ig = FIELD_EX32(hdm0_ctrl, CXL_HDM_DECODER0_CTRL, IG); + + *dpa = (MAKE_64BIT_MASK(0, 8 + ig) & hpa_offset) | + ((MAKE_64BIT_MASK(8 + ig + iw, 64 - 8 - ig - iw) & hpa_offset) >> iw); + + return true; +} + +MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, + unsigned size, MemTxAttrs attrs) +{ + CXLType3Dev *ct3d = CXL_TYPE3(d); + uint64_t dpa_offset; + MemoryRegion *mr; + + /* TODO support volatile region */ + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + return MEMTX_ERROR; + } + + if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { + return MEMTX_ERROR; + } + + if (dpa_offset > int128_get64(mr->size)) { + return MEMTX_ERROR; + } + + return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size); +} + +MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + CXLType3Dev *ct3d = CXL_TYPE3(d); + uint64_t dpa_offset; + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!mr) { + return MEMTX_OK; + } + + if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { + return MEMTX_OK; + } + + if (dpa_offset > int128_get64(mr->size)) { + return MEMTX_OK; + } + return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs, + &data, size); +} + +static void ct3d_reset(DeviceState *dev) +{ + CXLType3Dev *ct3d = CXL_TYPE3(dev); + uint32_t *reg_state = ct3d->cxl_cstate.crb.cache_mem_registers; + uint32_t *write_msk = ct3d->cxl_cstate.crb.cache_mem_regs_write_mask; + + cxl_component_register_init_common(reg_state, write_msk, CXL2_TYPE3_DEVICE); + cxl_device_register_init_common(&ct3d->cxl_dstate); +} + +static Property ct3_props[] = { + DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND, + HostMemoryBackend *), + DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL), + DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename), + DEFINE_PROP_END_OF_LIST(), +}; + +static uint64_t get_lsa_size(CXLType3Dev *ct3d) +{ + MemoryRegion *mr; + + mr = host_memory_backend_get_memory(ct3d->lsa); + return memory_region_size(mr); +} + +static void validate_lsa_access(MemoryRegion *mr, uint64_t size, + uint64_t offset) +{ + assert(offset + size <= memory_region_size(mr)); + assert(offset + size > offset); +} + +static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size, + uint64_t offset) +{ + MemoryRegion *mr; + void *lsa; + + mr = host_memory_backend_get_memory(ct3d->lsa); + validate_lsa_access(mr, size, offset); + + lsa = memory_region_get_ram_ptr(mr) + offset; + memcpy(buf, lsa, size); + + return size; +} + +static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, + uint64_t offset) +{ + MemoryRegion *mr; + void *lsa; + + mr = host_memory_backend_get_memory(ct3d->lsa); + validate_lsa_access(mr, size, offset); + + lsa = memory_region_get_ram_ptr(mr) + offset; + memcpy(lsa, buf, size); + memory_region_set_dirty(mr, offset, size); + + /* + * Just like the PMEM, if the guest is not allowed to exit gracefully, label + * updates will get lost. + */ +} + +static void ct3_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc); + CXLType3Class *cvc = CXL_TYPE3_CLASS(oc); + + pc->realize = ct3_realize; + pc->exit = ct3_exit; + pc->class_id = PCI_CLASS_STORAGE_EXPRESS; + pc->vendor_id = PCI_VENDOR_ID_INTEL; + pc->device_id = 0xd93; /* LVF for now */ + pc->revision = 1; + + pc->config_write = ct3d_config_write; + pc->config_read = ct3d_config_read; + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->desc = "CXL PMEM Device (Type 3)"; + dc->reset = ct3d_reset; + device_class_set_props(dc, ct3_props); + + cvc->get_lsa_size = get_lsa_size; + cvc->get_lsa = get_lsa; + cvc->set_lsa = set_lsa; +} + +static const TypeInfo ct3d_info = { + .name = TYPE_CXL_TYPE3, + .parent = TYPE_PCI_DEVICE, + .class_size = sizeof(struct CXLType3Class), + .class_init = ct3_class_init, + .instance_size = sizeof(CXLType3Dev), + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CXL_DEVICE }, + { INTERFACE_PCIE_DEVICE }, + {} + }, +}; + +static void ct3d_registers(void) +{ + type_register_static(&ct3d_info); +} + +type_init(ct3d_registers); diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c new file mode 100644 index 00000000..d9f83017 --- /dev/null +++ b/hw/mem/memory-device.c @@ -0,0 +1,346 @@ +/* + * Memory Device Interface + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * Copyright (c) 2018 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "qemu/range.h" +#include "hw/virtio/vhost.h" +#include "sysemu/kvm.h" +#include "trace.h" + +static gint memory_device_addr_sort(gconstpointer a, gconstpointer b) +{ + const MemoryDeviceState *md_a = MEMORY_DEVICE(a); + const MemoryDeviceState *md_b = MEMORY_DEVICE(b); + const MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a); + const MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b); + const uint64_t addr_a = mdc_a->get_addr(md_a); + const uint64_t addr_b = mdc_b->get_addr(md_b); + + if (addr_a > addr_b) { + return 1; + } else if (addr_a < addr_b) { + return -1; + } + return 0; +} + +static int memory_device_build_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* only realized memory devices matter */ + *list = g_slist_insert_sorted(*list, dev, memory_device_addr_sort); + } + } + + object_child_foreach(obj, memory_device_build_list, opaque); + return 0; +} + +static int memory_device_used_region_size(Object *obj, void *opaque) +{ + uint64_t *size = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { + const DeviceState *dev = DEVICE(obj); + const MemoryDeviceState *md = MEMORY_DEVICE(obj); + + if (dev->realized) { + *size += memory_device_get_region_size(md, &error_abort); + } + } + + object_child_foreach(obj, memory_device_used_region_size, opaque); + return 0; +} + +static void memory_device_check_addable(MachineState *ms, uint64_t size, + Error **errp) +{ + uint64_t used_region_size = 0; + + /* we will need a new memory slot for kvm and vhost */ + if (kvm_enabled() && !kvm_has_free_slot(ms)) { + error_setg(errp, "hypervisor has no free memory slots left"); + return; + } + if (!vhost_has_free_slot()) { + error_setg(errp, "a used vhost backend has no free memory slots left"); + return; + } + + /* will we exceed the total amount of memory specified */ + memory_device_used_region_size(OBJECT(ms), &used_region_size); + if (used_region_size + size < used_region_size || + used_region_size + size > ms->maxram_size - ms->ram_size) { + error_setg(errp, "not enough space, currently 0x%" PRIx64 + " in use of total space for memory devices 0x" RAM_ADDR_FMT, + used_region_size, ms->maxram_size - ms->ram_size); + return; + } + +} + +static uint64_t memory_device_get_free_addr(MachineState *ms, + const uint64_t *hint, + uint64_t align, uint64_t size, + Error **errp) +{ + Error *err = NULL; + GSList *list = NULL, *item; + Range as, new = range_empty; + + if (!ms->device_memory) { + error_setg(errp, "memory devices (e.g. for memory hotplug) are not " + "supported by the machine"); + return 0; + } + + if (!memory_region_size(&ms->device_memory->mr)) { + error_setg(errp, "memory devices (e.g. for memory hotplug) are not " + "enabled, please specify the maxmem option"); + return 0; + } + range_init_nofail(&as, ms->device_memory->base, + memory_region_size(&ms->device_memory->mr)); + + /* start of address space indicates the maximum alignment we expect */ + if (!QEMU_IS_ALIGNED(range_lob(&as), align)) { + warn_report("the alignment (0x%" PRIx64 ") exceeds the expected" + " maximum alignment, memory will get fragmented and not" + " all 'maxmem' might be usable for memory devices.", + align); + } + + memory_device_check_addable(ms, size, &err); + if (err) { + error_propagate(errp, err); + return 0; + } + + if (hint && !QEMU_IS_ALIGNED(*hint, align)) { + error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes", + align); + return 0; + } + + if (!QEMU_IS_ALIGNED(size, align)) { + error_setg(errp, "backend memory size must be multiple of 0x%" + PRIx64, align); + return 0; + } + + if (hint) { + if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) { + error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64 + "], usable range for memory devices [0x%" PRIx64 ":0x%" + PRIx64 "]", *hint, size, range_lob(&as), + range_size(&as)); + return 0; + } + } else { + if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) { + error_setg(errp, "can't add memory device, device too big"); + return 0; + } + } + + /* find address range that will fit new memory device */ + object_child_foreach(OBJECT(ms), memory_device_build_list, &list); + for (item = list; item; item = g_slist_next(item)) { + const MemoryDeviceState *md = item->data; + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(OBJECT(md)); + uint64_t next_addr; + Range tmp; + + range_init_nofail(&tmp, mdc->get_addr(md), + memory_device_get_region_size(md, &error_abort)); + + if (range_overlaps_range(&tmp, &new)) { + if (hint) { + const DeviceState *d = DEVICE(md); + error_setg(errp, "address range conflicts with memory device" + " id='%s'", d->id ? d->id : "(unnamed)"); + goto out; + } + + next_addr = QEMU_ALIGN_UP(range_upb(&tmp) + 1, align); + if (!next_addr || range_init(&new, next_addr, range_size(&new))) { + range_make_empty(&new); + break; + } + } else if (range_lob(&tmp) > range_upb(&new)) { + break; + } + } + + if (!range_contains_range(&as, &new)) { + error_setg(errp, "could not find position in guest address space for " + "memory device - memory fragmented due to alignments"); + } +out: + g_slist_free(list); + return range_lob(&new); +} + +MemoryDeviceInfoList *qmp_memory_device_list(void) +{ + GSList *devices = NULL, *item; + MemoryDeviceInfoList *list = NULL, **tail = &list; + + object_child_foreach(qdev_get_machine(), memory_device_build_list, + &devices); + + for (item = devices; item; item = g_slist_next(item)) { + const MemoryDeviceState *md = MEMORY_DEVICE(item->data); + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + + mdc->fill_device_info(md, info); + + QAPI_LIST_APPEND(tail, info); + } + + g_slist_free(devices); + + return list; +} + +static int memory_device_plugged_size(Object *obj, void *opaque) +{ + uint64_t *size = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { + const DeviceState *dev = DEVICE(obj); + const MemoryDeviceState *md = MEMORY_DEVICE(obj); + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj); + + if (dev->realized) { + *size += mdc->get_plugged_size(md, &error_abort); + } + } + + object_child_foreach(obj, memory_device_plugged_size, opaque); + return 0; +} + +uint64_t get_plugged_memory_size(void) +{ + uint64_t size = 0; + + memory_device_plugged_size(qdev_get_machine(), &size); + + return size; +} + +void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, + const uint64_t *legacy_align, Error **errp) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + Error *local_err = NULL; + uint64_t addr, align = 0; + MemoryRegion *mr; + + mr = mdc->get_memory_region(md, &local_err); + if (local_err) { + goto out; + } + + if (legacy_align) { + align = *legacy_align; + } else { + if (mdc->get_min_alignment) { + align = mdc->get_min_alignment(md); + } + align = MAX(align, memory_region_get_alignment(mr)); + } + addr = mdc->get_addr(md); + addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align, + memory_region_size(mr), &local_err); + if (local_err) { + goto out; + } + mdc->set_addr(md, addr, &local_err); + if (!local_err) { + trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "", + addr); + } +out: + error_propagate(errp, local_err); +} + +void memory_device_plug(MemoryDeviceState *md, MachineState *ms) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + const uint64_t addr = mdc->get_addr(md); + MemoryRegion *mr; + + /* + * We expect that a previous call to memory_device_pre_plug() succeeded, so + * it can't fail at this point. + */ + mr = mdc->get_memory_region(md, &error_abort); + g_assert(ms->device_memory); + + memory_region_add_subregion(&ms->device_memory->mr, + addr - ms->device_memory->base, mr); + trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); +} + +void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + MemoryRegion *mr; + + /* + * We expect that a previous call to memory_device_pre_plug() succeeded, so + * it can't fail at this point. + */ + mr = mdc->get_memory_region(md, &error_abort); + g_assert(ms->device_memory); + + memory_region_del_subregion(&ms->device_memory->mr, mr); + trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", + mdc->get_addr(md)); +} + +uint64_t memory_device_get_region_size(const MemoryDeviceState *md, + Error **errp) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + MemoryRegion *mr; + + /* dropping const here is fine as we don't touch the memory region */ + mr = mdc->get_memory_region((MemoryDeviceState *)md, errp); + if (!mr) { + return 0; + } + + return memory_region_size(mr); +} + +static const TypeInfo memory_device_info = { + .name = TYPE_MEMORY_DEVICE, + .parent = TYPE_INTERFACE, + .class_size = sizeof(MemoryDeviceClass), +}; + +static void memory_device_register_types(void) +{ + type_register_static(&memory_device_info); +} + +type_init(memory_device_register_types) diff --git a/hw/mem/meson.build b/hw/mem/meson.build new file mode 100644 index 00000000..609b2b36 --- /dev/null +++ b/hw/mem/meson.build @@ -0,0 +1,10 @@ +mem_ss = ss.source_set() +mem_ss.add(files('memory-device.c')) +mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) +mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) +mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) +mem_ss.add(when: 'CONFIG_CXL_MEM_DEVICE', if_true: files('cxl_type3.c')) + +softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) + +softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c')) diff --git a/hw/mem/npcm7xx_mc.c b/hw/mem/npcm7xx_mc.c new file mode 100644 index 00000000..abc5af56 --- /dev/null +++ b/hw/mem/npcm7xx_mc.c @@ -0,0 +1,84 @@ +/* + * Nuvoton NPCM7xx Memory Controller stub + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" + +#include "hw/mem/npcm7xx_mc.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/units.h" + +#define NPCM7XX_MC_REGS_SIZE (4 * KiB) + +static uint64_t npcm7xx_mc_read(void *opaque, hwaddr addr, unsigned int size) +{ + /* + * If bits 8..11 @ offset 0 are not zero, the boot block thinks the memory + * controller has already been initialized and will skip DDR training. + */ + if (addr == 0) { + return 0x100; + } + + qemu_log_mask(LOG_UNIMP, "%s: mostly unimplemented\n", __func__); + + return 0; +} + +static void npcm7xx_mc_write(void *opaque, hwaddr addr, uint64_t v, + unsigned int size) +{ + qemu_log_mask(LOG_UNIMP, "%s: mostly unimplemented\n", __func__); +} + +static const MemoryRegionOps npcm7xx_mc_ops = { + .read = npcm7xx_mc_read, + .write = npcm7xx_mc_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false, + }, +}; + +static void npcm7xx_mc_realize(DeviceState *dev, Error **errp) +{ + NPCM7xxMCState *s = NPCM7XX_MC(dev); + + memory_region_init_io(&s->mmio, OBJECT(s), &npcm7xx_mc_ops, s, "regs", + NPCM7XX_MC_REGS_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); +} + +static void npcm7xx_mc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "NPCM7xx Memory Controller stub"; + dc->realize = npcm7xx_mc_realize; +} + +static const TypeInfo npcm7xx_mc_types[] = { + { + .name = TYPE_NPCM7XX_MC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(NPCM7xxMCState), + .class_init = npcm7xx_mc_class_init, + }, +}; +DEFINE_TYPES(npcm7xx_mc_types); diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c new file mode 100644 index 00000000..31080c22 --- /dev/null +++ b/hw/mem/nvdimm.c @@ -0,0 +1,282 @@ +/* + * Non-Volatile Dual In-line Memory Module Virtualization Implementation + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong <guangrong.xiao@linux.intel.com> + * + * Currently, it only supports PMEM Virtualization. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qemu/pmem.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "hw/mem/nvdimm.h" +#include "hw/qdev-properties.h" +#include "hw/mem/memory-device.h" +#include "sysemu/hostmem.h" + +static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + uint64_t value = nvdimm->label_size; + + visit_type_size(v, name, &value, errp); +} + +static void nvdimm_set_label_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + uint64_t value; + + if (nvdimm->nvdimm_mr) { + error_setg(errp, "cannot change property value"); + return; + } + + if (!visit_type_size(v, name, &value, errp)) { + return; + } + if (value < MIN_NAMESPACE_LABEL_SIZE) { + error_setg(errp, "Property '%s.%s' (0x%" PRIx64 ") is required" + " at least 0x%lx", object_get_typename(obj), name, value, + MIN_NAMESPACE_LABEL_SIZE); + return; + } + + nvdimm->label_size = value; +} + +static void nvdimm_get_uuid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + char *value = NULL; + + value = qemu_uuid_unparse_strdup(&nvdimm->uuid); + + visit_type_str(v, name, &value, errp); + g_free(value); +} + + +static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + char *value; + + if (!visit_type_str(v, name, &value, errp)) { + return; + } + + if (qemu_uuid_parse(value, &nvdimm->uuid) != 0) { + error_setg(errp, "Property '%s.%s' has invalid value", + object_get_typename(obj), name); + } + + g_free(value); +} + + +static void nvdimm_init(Object *obj) +{ + object_property_add(obj, NVDIMM_LABEL_SIZE_PROP, "int", + nvdimm_get_label_size, nvdimm_set_label_size, NULL, + NULL); + + object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid, + nvdimm_set_uuid, NULL, NULL); +} + +static void nvdimm_finalize(Object *obj) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + + g_free(nvdimm->nvdimm_mr); +} + +static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(nvdimm); + uint64_t align, pmem_size, size; + MemoryRegion *mr; + + g_assert(!nvdimm->nvdimm_mr); + + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); + return; + } + + mr = host_memory_backend_get_memory(dimm->hostmem); + align = memory_region_get_alignment(mr); + size = memory_region_size(mr); + + pmem_size = size - nvdimm->label_size; + nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size; + pmem_size = QEMU_ALIGN_DOWN(pmem_size, align); + + if (size <= nvdimm->label_size || !pmem_size) { + HostMemoryBackend *hostmem = dimm->hostmem; + + error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too " + "small to contain nvdimm label (0x%" PRIx64 ") and " + "aligned PMEM (0x%" PRIx64 ")", + object_get_canonical_path_component(OBJECT(hostmem)), + memory_region_size(mr), nvdimm->label_size, align); + return; + } + + if (!nvdimm->unarmed && memory_region_is_rom(mr)) { + HostMemoryBackend *hostmem = dimm->hostmem; + + error_setg(errp, "'unarmed' property must be 'on' since memdev %s " + "is read-only", + object_get_canonical_path_component(OBJECT(hostmem))); + return; + } + + nvdimm->nvdimm_mr = g_new(MemoryRegion, 1); + memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm), + "nvdimm-memory", mr, 0, pmem_size); + memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true); + nvdimm->nvdimm_mr->align = align; +} + +static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(md); + Error *local_err = NULL; + + if (!nvdimm->nvdimm_mr) { + nvdimm_prepare_memory_region(nvdimm, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return NULL; + } + } + return nvdimm->nvdimm_mr; +} + +static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(dimm); + NVDIMMClass *ndc = NVDIMM_GET_CLASS(nvdimm); + + if (!nvdimm->nvdimm_mr) { + nvdimm_prepare_memory_region(nvdimm, errp); + } + + if (ndc->realize) { + ndc->realize(nvdimm, errp); + } +} + +static void nvdimm_unrealize(PCDIMMDevice *dimm) +{ + NVDIMMDevice *nvdimm = NVDIMM(dimm); + NVDIMMClass *ndc = NVDIMM_GET_CLASS(nvdimm); + + if (ndc->unrealize) { + ndc->unrealize(nvdimm); + } +} + +/* + * the caller should check the input parameters before calling + * label read/write functions. + */ +static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size, + uint64_t offset) +{ + assert((nvdimm->label_size >= size + offset) && (offset + size > offset)); +} + +static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf, + uint64_t size, uint64_t offset) +{ + nvdimm_validate_rw_label_data(nvdimm, size, offset); + + memcpy(buf, nvdimm->label_data + offset, size); +} + +static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf, + uint64_t size, uint64_t offset) +{ + MemoryRegion *mr; + PCDIMMDevice *dimm = PC_DIMM(nvdimm); + bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem), + "pmem", NULL); + uint64_t backend_offset; + + nvdimm_validate_rw_label_data(nvdimm, size, offset); + + if (!is_pmem) { + memcpy(nvdimm->label_data + offset, buf, size); + } else { + pmem_memcpy_persist(nvdimm->label_data + offset, buf, size); + } + + mr = host_memory_backend_get_memory(dimm->hostmem); + backend_offset = memory_region_size(mr) - nvdimm->label_size + offset; + memory_region_set_dirty(mr, backend_offset, size); +} + +static Property nvdimm_properties[] = { + DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvdimm_class_init(ObjectClass *oc, void *data) +{ + PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); + NVDIMMClass *nvc = NVDIMM_CLASS(oc); + DeviceClass *dc = DEVICE_CLASS(oc); + + ddc->realize = nvdimm_realize; + ddc->unrealize = nvdimm_unrealize; + mdc->get_memory_region = nvdimm_md_get_memory_region; + device_class_set_props(dc, nvdimm_properties); + + nvc->read_label_data = nvdimm_read_label_data; + nvc->write_label_data = nvdimm_write_label_data; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static const TypeInfo nvdimm_info = { + .name = TYPE_NVDIMM, + .parent = TYPE_PC_DIMM, + .class_size = sizeof(NVDIMMClass), + .class_init = nvdimm_class_init, + .instance_size = sizeof(NVDIMMDevice), + .instance_init = nvdimm_init, + .instance_finalize = nvdimm_finalize, +}; + +static void nvdimm_register_types(void) +{ + type_register_static(&nvdimm_info); +} + +type_init(nvdimm_register_types) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c new file mode 100644 index 00000000..f27e1a11 --- /dev/null +++ b/hw/mem/pc-dimm.c @@ -0,0 +1,312 @@ +/* + * Dimm device for Memory Hotplug + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "hw/boards.h" +#include "hw/mem/pc-dimm.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "hw/mem/nvdimm.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/module.h" +#include "sysemu/hostmem.h" +#include "sysemu/numa.h" +#include "trace.h" + +static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); + +static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp) +{ + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); + return NULL; + } + + return host_memory_backend_get_memory(dimm->hostmem); +} + +void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine, + const uint64_t *legacy_align, Error **errp) +{ + Error *local_err = NULL; + int slot; + + slot = object_property_get_int(OBJECT(dimm), PC_DIMM_SLOT_PROP, + &error_abort); + if ((slot < 0 || slot >= machine->ram_slots) && + slot != PC_DIMM_UNASSIGNED_SLOT) { + error_setg(errp, + "invalid slot number %d, valid range is [0-%" PRIu64 "]", + slot, machine->ram_slots - 1); + return; + } + + slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, + machine->ram_slots, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + object_property_set_int(OBJECT(dimm), PC_DIMM_SLOT_PROP, slot, + &error_abort); + trace_mhp_pc_dimm_assigned_slot(slot); + + memory_device_pre_plug(MEMORY_DEVICE(dimm), machine, legacy_align, + errp); +} + +void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine) +{ + MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm, + &error_abort); + + memory_device_plug(MEMORY_DEVICE(dimm), machine); + vmstate_register_ram(vmstate_mr, DEVICE(dimm)); +} + +void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine) +{ + MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm, + &error_abort); + + memory_device_unplug(MEMORY_DEVICE(dimm), machine); + vmstate_unregister_ram(vmstate_mr, DEVICE(dimm)); +} + +static int pc_dimm_slot2bitmap(Object *obj, void *opaque) +{ + unsigned long *bitmap = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* count only realized DIMMs */ + PCDIMMDevice *d = PC_DIMM(obj); + set_bit(d->slot, bitmap); + } + } + + object_child_foreach(obj, pc_dimm_slot2bitmap, opaque); + return 0; +} + +static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp) +{ + unsigned long *bitmap; + int slot = 0; + + if (max_slots <= 0) { + error_setg(errp, "no slots where allocated, please specify " + "the 'slots' option"); + return slot; + } + + bitmap = bitmap_new(max_slots); + object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap); + + /* check if requested slot is not occupied */ + if (hint) { + if (*hint >= max_slots) { + error_setg(errp, "invalid slot# %d, should be less than %d", + *hint, max_slots); + } else if (!test_bit(*hint, bitmap)) { + slot = *hint; + } else { + error_setg(errp, "slot %d is busy", *hint); + } + goto out; + } + + /* search for free slot */ + slot = find_first_zero_bit(bitmap, max_slots); + if (slot == max_slots) { + error_setg(errp, "no free slots available"); + } +out: + g_free(bitmap); + return slot; +} + +static Property pc_dimm_properties[] = { + DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0), + DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0), + DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot, + PC_DIMM_UNASSIGNED_SLOT), + DEFINE_PROP_LINK(PC_DIMM_MEMDEV_PROP, PCDIMMDevice, hostmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + Error *local_err = NULL; + uint64_t value; + + value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + visit_type_uint64(v, name, &value, errp); +} + +static void pc_dimm_init(Object *obj) +{ + object_property_add(obj, PC_DIMM_SIZE_PROP, "uint64", pc_dimm_get_size, + NULL, NULL, NULL); +} + +static void pc_dimm_realize(DeviceState *dev, Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MachineState *ms = MACHINE(qdev_get_machine()); + + if (ms->numa_state) { + int nb_numa_nodes = ms->numa_state->num_nodes; + + if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || + (!nb_numa_nodes && dimm->node)) { + error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" + PRIu32 "' which exceeds the number of numa nodes: %d", + dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); + return; + } + } else if (dimm->node > 0) { + error_setg(errp, "machine doesn't support NUMA"); + return; + } + + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); + return; + } else if (host_memory_backend_is_mapped(dimm->hostmem)) { + error_setg(errp, "can't use already busy memdev: %s", + object_get_canonical_path_component(OBJECT(dimm->hostmem))); + return; + } + + if (ddc->realize) { + ddc->realize(dimm, errp); + } + + host_memory_backend_set_mapped(dimm->hostmem, true); +} + +static void pc_dimm_unrealize(DeviceState *dev) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + + if (ddc->unrealize) { + ddc->unrealize(dimm); + } + + host_memory_backend_set_mapped(dimm->hostmem, false); +} + +static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md) +{ + return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP, + &error_abort); +} + +static void pc_dimm_md_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), PC_DIMM_ADDR_PROP, addr, errp); +} + +static MemoryRegion *pc_dimm_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + return pc_dimm_get_memory_region(PC_DIMM(md), errp); +} + +static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + const DeviceClass *dc = DEVICE_GET_CLASS(md); + const PCDIMMDevice *dimm = PC_DIMM(md); + const DeviceState *dev = DEVICE(md); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, + NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + info->u.nvdimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM; + } else { + info->u.dimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_DIMM; + } +} + +static void pc_dimm_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); + + dc->realize = pc_dimm_realize; + dc->unrealize = pc_dimm_unrealize; + device_class_set_props(dc, pc_dimm_properties); + dc->desc = "DIMM memory module"; + + mdc->get_addr = pc_dimm_md_get_addr; + mdc->set_addr = pc_dimm_md_set_addr; + /* for a dimm plugged_size == region_size */ + mdc->get_plugged_size = memory_device_get_region_size; + mdc->get_memory_region = pc_dimm_md_get_memory_region; + mdc->fill_device_info = pc_dimm_md_fill_device_info; +} + +static const TypeInfo pc_dimm_info = { + .name = TYPE_PC_DIMM, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PCDIMMDevice), + .instance_init = pc_dimm_init, + .class_init = pc_dimm_class_init, + .class_size = sizeof(PCDIMMDeviceClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void pc_dimm_register_types(void) +{ + type_register_static(&pc_dimm_info); +} + +type_init(pc_dimm_register_types) diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c new file mode 100644 index 00000000..e6640eb8 --- /dev/null +++ b/hw/mem/sparse-mem.c @@ -0,0 +1,150 @@ +/* + * A sparse memory device. Useful for fuzzing + * + * Copyright Red Hat Inc., 2021 + * + * Authors: + * Alexander Bulekov <alxndr@bu.edu> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "qapi/error.h" +#include "qemu/units.h" +#include "sysemu/qtest.h" +#include "hw/mem/sparse-mem.h" + +#define SPARSE_MEM(obj) OBJECT_CHECK(SparseMemState, (obj), TYPE_SPARSE_MEM) +#define SPARSE_BLOCK_SIZE 0x1000 + +typedef struct SparseMemState { + SysBusDevice parent_obj; + MemoryRegion mmio; + uint64_t baseaddr; + uint64_t length; + uint64_t size_used; + uint64_t maxsize; + GHashTable *mapped; +} SparseMemState; + +typedef struct sparse_mem_block { + uint8_t data[SPARSE_BLOCK_SIZE]; +} sparse_mem_block; + +static uint64_t sparse_mem_read(void *opaque, hwaddr addr, unsigned int size) +{ + SparseMemState *s = opaque; + uint64_t ret = 0; + size_t pfn = addr / SPARSE_BLOCK_SIZE; + size_t offset = addr % SPARSE_BLOCK_SIZE; + sparse_mem_block *block; + + block = g_hash_table_lookup(s->mapped, (void *)pfn); + if (block) { + assert(offset + size <= sizeof(block->data)); + memcpy(&ret, block->data + offset, size); + } + return ret; +} + +static void sparse_mem_write(void *opaque, hwaddr addr, uint64_t v, + unsigned int size) +{ + SparseMemState *s = opaque; + size_t pfn = addr / SPARSE_BLOCK_SIZE; + size_t offset = addr % SPARSE_BLOCK_SIZE; + sparse_mem_block *block; + + if (!g_hash_table_lookup(s->mapped, (void *)pfn) && + s->size_used + SPARSE_BLOCK_SIZE < s->maxsize && v) { + g_hash_table_insert(s->mapped, (void *)pfn, + g_new0(sparse_mem_block, 1)); + s->size_used += sizeof(block->data); + } + block = g_hash_table_lookup(s->mapped, (void *)pfn); + if (!block) { + return; + } + + assert(offset + size <= sizeof(block->data)); + + memcpy(block->data + offset, &v, size); + +} + +static const MemoryRegionOps sparse_mem_ops = { + .read = sparse_mem_read, + .write = sparse_mem_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, +}; + +static Property sparse_mem_properties[] = { + /* The base address of the memory */ + DEFINE_PROP_UINT64("baseaddr", SparseMemState, baseaddr, 0x0), + /* The length of the sparse memory region */ + DEFINE_PROP_UINT64("length", SparseMemState, length, UINT64_MAX), + /* Max amount of actual memory that can be used to back the sparse memory */ + DEFINE_PROP_UINT64("maxsize", SparseMemState, maxsize, 10 * MiB), + DEFINE_PROP_END_OF_LIST(), +}; + +MemoryRegion *sparse_mem_init(uint64_t addr, uint64_t length) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_SPARSE_MEM); + qdev_prop_set_uint64(dev, "baseaddr", addr); + qdev_prop_set_uint64(dev, "length", length); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, addr, -10000); + return &SPARSE_MEM(dev)->mmio; +} + +static void sparse_mem_realize(DeviceState *dev, Error **errp) +{ + SparseMemState *s = SPARSE_MEM(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + + if (!qtest_enabled()) { + error_setg(errp, "sparse_mem device should only be used " + "for testing with QTest"); + return; + } + + assert(s->baseaddr + s->length > s->baseaddr); + + s->mapped = g_hash_table_new(NULL, NULL); + memory_region_init_io(&s->mmio, OBJECT(s), &sparse_mem_ops, s, + "sparse-mem", s->length); + sysbus_init_mmio(sbd, &s->mmio); +} + +static void sparse_mem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, sparse_mem_properties); + + dc->desc = "Sparse Memory Device"; + dc->realize = sparse_mem_realize; +} + +static const TypeInfo sparse_mem_types[] = { + { + .name = TYPE_SPARSE_MEM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SparseMemState), + .class_init = sparse_mem_class_init, + }, +}; +DEFINE_TYPES(sparse_mem_types); diff --git a/hw/mem/trace-events b/hw/mem/trace-events new file mode 100644 index 00000000..8b6b02b5 --- /dev/null +++ b/hw/mem/trace-events @@ -0,0 +1,8 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# pc-dimm.c +mhp_pc_dimm_assigned_slot(int slot) "%d" +# memory-device.c +memory_device_pre_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64 +memory_device_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64 +memory_device_unplug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64 diff --git a/hw/mem/trace.h b/hw/mem/trace.h new file mode 100644 index 00000000..2f2c9454 --- /dev/null +++ b/hw/mem/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_mem.h" |