forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
356 lines
9.1 KiB
356 lines
9.1 KiB
// SPDX-License-Identifier: GPL-2.0 |
|
/* |
|
* Bluefield-specific EDAC driver. |
|
* |
|
* Copyright (c) 2019 Mellanox Technologies. |
|
*/ |
|
|
|
#include <linux/acpi.h> |
|
#include <linux/arm-smccc.h> |
|
#include <linux/bitfield.h> |
|
#include <linux/edac.h> |
|
#include <linux/io.h> |
|
#include <linux/module.h> |
|
#include <linux/platform_device.h> |
|
|
|
#include "edac_module.h" |
|
|
|
#define DRIVER_NAME "bluefield-edac" |
|
|
|
/* |
|
* Mellanox BlueField EMI (External Memory Interface) register definitions. |
|
*/ |
|
|
|
#define MLXBF_ECC_CNT 0x340 |
|
#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) |
|
#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) |
|
|
|
#define MLXBF_ECC_ERR 0x348 |
|
#define MLXBF_ECC_ERR__SECC BIT(0) |
|
#define MLXBF_ECC_ERR__DECC BIT(16) |
|
|
|
#define MLXBF_ECC_LATCH_SEL 0x354 |
|
#define MLXBF_ECC_LATCH_SEL__START BIT(24) |
|
|
|
#define MLXBF_ERR_ADDR_0 0x358 |
|
|
|
#define MLXBF_ERR_ADDR_1 0x37c |
|
|
|
#define MLXBF_SYNDROM 0x35c |
|
#define MLXBF_SYNDROM__DERR BIT(0) |
|
#define MLXBF_SYNDROM__SERR BIT(1) |
|
#define MLXBF_SYNDROM__SYN GENMASK(25, 16) |
|
|
|
#define MLXBF_ADD_INFO 0x364 |
|
#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) |
|
|
|
#define MLXBF_EDAC_MAX_DIMM_PER_MC 2 |
|
#define MLXBF_EDAC_ERROR_GRAIN 8 |
|
|
|
/* |
|
* Request MLNX_SIP_GET_DIMM_INFO |
|
* |
|
* Retrieve information about DIMM on a certain slot. |
|
* |
|
* Call register usage: |
|
* a0: MLNX_SIP_GET_DIMM_INFO |
|
* a1: (Memory controller index) << 16 | (Dimm index in memory controller) |
|
* a2-7: not used. |
|
* |
|
* Return status: |
|
* a0: MLXBF_DIMM_INFO defined below describing the DIMM. |
|
* a1-3: not used. |
|
*/ |
|
#define MLNX_SIP_GET_DIMM_INFO 0x82000008 |
|
|
|
/* Format for the SMC response about the memory information */ |
|
#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) |
|
#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) |
|
#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) |
|
#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) |
|
#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) |
|
#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) |
|
|
|
struct bluefield_edac_priv { |
|
int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; |
|
void __iomem *emi_base; |
|
int dimm_per_mc; |
|
}; |
|
|
|
static u64 smc_call1(u64 smc_op, u64 smc_arg) |
|
{ |
|
struct arm_smccc_res res; |
|
|
|
arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); |
|
|
|
return res.a0; |
|
} |
|
|
|
/* |
|
* Gather the ECC information from the External Memory Interface registers |
|
* and report it to the edac handler. |
|
*/ |
|
static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, |
|
int error_cnt, |
|
int is_single_ecc) |
|
{ |
|
struct bluefield_edac_priv *priv = mci->pvt_info; |
|
u32 dram_additional_info, err_prank, edea0, edea1; |
|
u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; |
|
enum hw_event_mc_err_type ecc_type; |
|
u64 ecc_dimm_addr; |
|
int ecc_dimm; |
|
|
|
ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : |
|
HW_EVENT_ERR_UNCORRECTED; |
|
|
|
/* |
|
* Tell the External Memory Interface to populate the relevant |
|
* registers with information about the last ECC error occurrence. |
|
*/ |
|
ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; |
|
writel(ecc_latch_select, priv->emi_base + MLXBF_ECC_LATCH_SEL); |
|
|
|
/* |
|
* Verify that the ECC reported info in the registers is of the |
|
* same type as the one asked to report. If not, just report the |
|
* error without the detailed information. |
|
*/ |
|
dram_syndrom = readl(priv->emi_base + MLXBF_SYNDROM); |
|
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); |
|
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); |
|
syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); |
|
|
|
if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { |
|
edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, |
|
0, 0, -1, mci->ctl_name, ""); |
|
return; |
|
} |
|
|
|
dram_additional_info = readl(priv->emi_base + MLXBF_ADD_INFO); |
|
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); |
|
|
|
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; |
|
|
|
edea0 = readl(priv->emi_base + MLXBF_ERR_ADDR_0); |
|
edea1 = readl(priv->emi_base + MLXBF_ERR_ADDR_1); |
|
|
|
ecc_dimm_addr = ((u64)edea1 << 32) | edea0; |
|
|
|
edac_mc_handle_error(ecc_type, mci, error_cnt, |
|
PFN_DOWN(ecc_dimm_addr), |
|
offset_in_page(ecc_dimm_addr), |
|
syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); |
|
} |
|
|
|
static void bluefield_edac_check(struct mem_ctl_info *mci) |
|
{ |
|
struct bluefield_edac_priv *priv = mci->pvt_info; |
|
u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; |
|
|
|
/* |
|
* The memory controller might not be initialized by the firmware |
|
* when there isn't memory, which may lead to bad register readings. |
|
*/ |
|
if (mci->edac_cap == EDAC_FLAG_NONE) |
|
return; |
|
|
|
ecc_count = readl(priv->emi_base + MLXBF_ECC_CNT); |
|
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); |
|
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); |
|
|
|
if (single_error_count) { |
|
ecc_error |= MLXBF_ECC_ERR__SECC; |
|
|
|
bluefield_gather_report_ecc(mci, single_error_count, 1); |
|
} |
|
|
|
if (double_error_count) { |
|
ecc_error |= MLXBF_ECC_ERR__DECC; |
|
|
|
bluefield_gather_report_ecc(mci, double_error_count, 0); |
|
} |
|
|
|
/* Write to clear reported errors. */ |
|
if (ecc_count) |
|
writel(ecc_error, priv->emi_base + MLXBF_ECC_ERR); |
|
} |
|
|
|
/* Initialize the DIMMs information for the given memory controller. */ |
|
static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) |
|
{ |
|
struct bluefield_edac_priv *priv = mci->pvt_info; |
|
int mem_ctrl_idx = mci->mc_idx; |
|
struct dimm_info *dimm; |
|
u64 smc_info, smc_arg; |
|
int is_empty = 1, i; |
|
|
|
for (i = 0; i < priv->dimm_per_mc; i++) { |
|
dimm = mci->dimms[i]; |
|
|
|
smc_arg = mem_ctrl_idx << 16 | i; |
|
smc_info = smc_call1(MLNX_SIP_GET_DIMM_INFO, smc_arg); |
|
|
|
if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { |
|
dimm->mtype = MEM_EMPTY; |
|
continue; |
|
} |
|
|
|
is_empty = 0; |
|
|
|
dimm->edac_mode = EDAC_SECDED; |
|
|
|
if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) |
|
dimm->mtype = MEM_NVDIMM; |
|
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) |
|
dimm->mtype = MEM_LRDDR4; |
|
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) |
|
dimm->mtype = MEM_RDDR4; |
|
else |
|
dimm->mtype = MEM_DDR4; |
|
|
|
dimm->nr_pages = |
|
FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * |
|
(SZ_1G / PAGE_SIZE); |
|
dimm->grain = MLXBF_EDAC_ERROR_GRAIN; |
|
|
|
/* Mem controller for BlueField only supports x4, x8 and x16 */ |
|
switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { |
|
case 4: |
|
dimm->dtype = DEV_X4; |
|
break; |
|
case 8: |
|
dimm->dtype = DEV_X8; |
|
break; |
|
case 16: |
|
dimm->dtype = DEV_X16; |
|
break; |
|
default: |
|
dimm->dtype = DEV_UNKNOWN; |
|
} |
|
|
|
priv->dimm_ranks[i] = |
|
FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); |
|
} |
|
|
|
if (is_empty) |
|
mci->edac_cap = EDAC_FLAG_NONE; |
|
else |
|
mci->edac_cap = EDAC_FLAG_SECDED; |
|
} |
|
|
|
static int bluefield_edac_mc_probe(struct platform_device *pdev) |
|
{ |
|
struct bluefield_edac_priv *priv; |
|
struct device *dev = &pdev->dev; |
|
struct edac_mc_layer layers[1]; |
|
struct mem_ctl_info *mci; |
|
struct resource *emi_res; |
|
unsigned int mc_idx, dimm_count; |
|
int rc, ret; |
|
|
|
/* Read the MSS (Memory SubSystem) index from ACPI table. */ |
|
if (device_property_read_u32(dev, "mss_number", &mc_idx)) { |
|
dev_warn(dev, "bf_edac: MSS number unknown\n"); |
|
return -EINVAL; |
|
} |
|
|
|
/* Read the DIMMs per MC from ACPI table. */ |
|
if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { |
|
dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); |
|
return -EINVAL; |
|
} |
|
|
|
if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { |
|
dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); |
|
return -EINVAL; |
|
} |
|
|
|
emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); |
|
if (!emi_res) |
|
return -EINVAL; |
|
|
|
layers[0].type = EDAC_MC_LAYER_SLOT; |
|
layers[0].size = dimm_count; |
|
layers[0].is_virt_csrow = true; |
|
|
|
mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); |
|
if (!mci) |
|
return -ENOMEM; |
|
|
|
priv = mci->pvt_info; |
|
|
|
priv->dimm_per_mc = dimm_count; |
|
priv->emi_base = devm_ioremap_resource(dev, emi_res); |
|
if (IS_ERR(priv->emi_base)) { |
|
dev_err(dev, "failed to map EMI IO resource\n"); |
|
ret = PTR_ERR(priv->emi_base); |
|
goto err; |
|
} |
|
|
|
mci->pdev = dev; |
|
mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | |
|
MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; |
|
mci->edac_ctl_cap = EDAC_FLAG_SECDED; |
|
|
|
mci->mod_name = DRIVER_NAME; |
|
mci->ctl_name = "BlueField_Memory_Controller"; |
|
mci->dev_name = dev_name(dev); |
|
mci->edac_check = bluefield_edac_check; |
|
|
|
/* Initialize mci with the actual populated DIMM information. */ |
|
bluefield_edac_init_dimms(mci); |
|
|
|
platform_set_drvdata(pdev, mci); |
|
|
|
/* Register with EDAC core */ |
|
rc = edac_mc_add_mc(mci); |
|
if (rc) { |
|
dev_err(dev, "failed to register with EDAC core\n"); |
|
ret = rc; |
|
goto err; |
|
} |
|
|
|
/* Only POLL mode supported so far. */ |
|
edac_op_state = EDAC_OPSTATE_POLL; |
|
|
|
return 0; |
|
|
|
err: |
|
edac_mc_free(mci); |
|
|
|
return ret; |
|
|
|
} |
|
|
|
static int bluefield_edac_mc_remove(struct platform_device *pdev) |
|
{ |
|
struct mem_ctl_info *mci = platform_get_drvdata(pdev); |
|
|
|
edac_mc_del_mc(&pdev->dev); |
|
edac_mc_free(mci); |
|
|
|
return 0; |
|
} |
|
|
|
static const struct acpi_device_id bluefield_mc_acpi_ids[] = { |
|
{"MLNXBF08", 0}, |
|
{} |
|
}; |
|
|
|
MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); |
|
|
|
static struct platform_driver bluefield_edac_mc_driver = { |
|
.driver = { |
|
.name = DRIVER_NAME, |
|
.acpi_match_table = bluefield_mc_acpi_ids, |
|
}, |
|
.probe = bluefield_edac_mc_probe, |
|
.remove = bluefield_edac_mc_remove, |
|
}; |
|
|
|
module_platform_driver(bluefield_edac_mc_driver); |
|
|
|
MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); |
|
MODULE_AUTHOR("Mellanox Technologies"); |
|
MODULE_LICENSE("GPL v2");
|
|
|